gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2020 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* Work around tree-optimization/91825.  */
  22 #pragma GCC diagnostic warning "-Wmaybe-uninitialized"
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "backend.h"
  28 #include "target.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "predict.h"
  32 #include "memmodel.h"
  33 #include "tm_p.h"
  34 #include "expmed.h"
  35 #include "optabs.h"
  36 #include "regs.h"
  37 #include "emit-rtl.h"
  38 #include "diagnostic-core.h"
  39 #include "fold-const.h"
  40 #include "stor-layout.h"
  41 #include "dojump.h"
  42 #include "explow.h"
  43 #include "expr.h"
  44 #include "langhooks.h"
  45 #include "tree-vector-builder.h"
  46
  47 struct target_expmed default_target_expmed;
  48 #if SWITCHABLE_TARGET
  49 struct target_expmed *this_target_expmed = &default_target_expmed;
  50 #endif
  51
  52 static bool store_integral_bit_field (rtx, opt_scalar_int_mode,
  53                                       unsigned HOST_WIDE_INT,
  54                                       unsigned HOST_WIDE_INT,
  55                                       poly_uint64, poly_uint64,
  56                                       machine_mode, rtx, bool, bool);
  57 static void store_fixed_bit_field (rtx, opt_scalar_int_mode,
  58                                    unsigned HOST_WIDE_INT,
  59                                    unsigned HOST_WIDE_INT,
  60                                    poly_uint64, poly_uint64,
  61                                    rtx, scalar_int_mode, bool);
  62 static void store_fixed_bit_field_1 (rtx, scalar_int_mode,
  63                                      unsigned HOST_WIDE_INT,
  64                                      unsigned HOST_WIDE_INT,
  65                                      rtx, scalar_int_mode, bool);
  66 static void store_split_bit_field (rtx, opt_scalar_int_mode,
  67                                    unsigned HOST_WIDE_INT,
  68                                    unsigned HOST_WIDE_INT,
  69                                    poly_uint64, poly_uint64,
  70                                    rtx, scalar_int_mode, bool);
  71 static rtx extract_integral_bit_field (rtx, opt_scalar_int_mode,
  72                                        unsigned HOST_WIDE_INT,
  73                                        unsigned HOST_WIDE_INT, int, rtx,
  74                                        machine_mode, machine_mode, bool, bool);
  75 static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode,
  76                                     unsigned HOST_WIDE_INT,
  77                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  78 static rtx extract_fixed_bit_field_1 (machine_mode, rtx, scalar_int_mode,
  79                                       unsigned HOST_WIDE_INT,
  80                                       unsigned HOST_WIDE_INT, rtx, int, bool);
  81 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  82 static rtx extract_split_bit_field (rtx, opt_scalar_int_mode,
  83                                     unsigned HOST_WIDE_INT,
  84                                     unsigned HOST_WIDE_INT, int, bool);
  85 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  86 static rtx expand_smod_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
  87 static rtx expand_sdiv_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
  88
  89 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  90    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  91    The mask is truncated if necessary to the width of mode MODE.  The
  92    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
  93
  94 static inline rtx
  95 mask_rtx (scalar_int_mode mode, int bitpos, int bitsize, bool complement)
  96 {
  97   return immed_wide_int_const
  98     (wi::shifted_mask (bitpos, bitsize, complement,
  99                        GET_MODE_PRECISION (mode)), mode);
 100 }
 101
 102 /* Test whether a value is zero of a power of two.  */
 103 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
 104   (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
 105
 106 struct init_expmed_rtl
 107 {
 108   rtx reg;
 109   rtx plus;
 110   rtx neg;
 111   rtx mult;
 112   rtx sdiv;
 113   rtx udiv;
 114   rtx sdiv_32;
 115   rtx smod_32;
 116   rtx wide_mult;
 117   rtx wide_lshr;
 118   rtx wide_trunc;
 119   rtx shift;
 120   rtx shift_mult;
 121   rtx shift_add;
 122   rtx shift_sub0;
 123   rtx shift_sub1;
 124   rtx zext;
 125   rtx trunc;
 126
 127   rtx pow2[MAX_BITS_PER_WORD];
 128   rtx cint[MAX_BITS_PER_WORD];
 129 };
 130
 131 static void
 132 init_expmed_one_conv (struct init_expmed_rtl *all, scalar_int_mode to_mode,
 133                       scalar_int_mode from_mode, bool speed)
 134 {
 135   int to_size, from_size;
 136   rtx which;
 137
 138   to_size = GET_MODE_PRECISION (to_mode);
 139   from_size = GET_MODE_PRECISION (from_mode);
 140
 141   /* Most partial integers have a precision less than the "full"
 142      integer it requires for storage.  In case one doesn't, for
 143      comparison purposes here, reduce the bit size by one in that
 144      case.  */
 145   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 146       && pow2p_hwi (to_size))
 147     to_size --;
 148   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 149       && pow2p_hwi (from_size))
 150     from_size --;
 151
 152   /* Assume cost of zero-extend and sign-extend is the same.  */
 153   which = (to_size < from_size ? all->trunc : all->zext);
 154
 155   PUT_MODE (all->reg, from_mode);
 156   set_convert_cost (to_mode, from_mode, speed,
 157                     set_src_cost (which, to_mode, speed));
 158   /* Restore all->reg's mode.  */
 159   PUT_MODE (all->reg, to_mode);
 160 }
 161
 162 static void
 163 init_expmed_one_mode (struct init_expmed_rtl *all,
 164                       machine_mode mode, int speed)
 165 {
 166   int m, n, mode_bitsize;
 167   machine_mode mode_from;
 168
 169   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 170
 171   PUT_MODE (all->reg, mode);
 172   PUT_MODE (all->plus, mode);
 173   PUT_MODE (all->neg, mode);
 174   PUT_MODE (all->mult, mode);
 175   PUT_MODE (all->sdiv, mode);
 176   PUT_MODE (all->udiv, mode);
 177   PUT_MODE (all->sdiv_32, mode);
 178   PUT_MODE (all->smod_32, mode);
 179   PUT_MODE (all->wide_trunc, mode);
 180   PUT_MODE (all->shift, mode);
 181   PUT_MODE (all->shift_mult, mode);
 182   PUT_MODE (all->shift_add, mode);
 183   PUT_MODE (all->shift_sub0, mode);
 184   PUT_MODE (all->shift_sub1, mode);
 185   PUT_MODE (all->zext, mode);
 186   PUT_MODE (all->trunc, mode);
 187
 188   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
 189   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
 190   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
 191   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
 192   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
 193
 194   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
 195                                      <= 2 * add_cost (speed, mode)));
 196   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
 197                                      <= 4 * add_cost (speed, mode)));
 198
 199   set_shift_cost (speed, mode, 0, 0);
 200   {
 201     int cost = add_cost (speed, mode);
 202     set_shiftadd_cost (speed, mode, 0, cost);
 203     set_shiftsub0_cost (speed, mode, 0, cost);
 204     set_shiftsub1_cost (speed, mode, 0, cost);
 205   }
 206
 207   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 208   for (m = 1; m < n; m++)
 209     {
 210       XEXP (all->shift, 1) = all->cint[m];
 211       XEXP (all->shift_mult, 1) = all->pow2[m];
 212
 213       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
 214       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
 215                                                        speed));
 216       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
 217                                                         speed));
 218       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
 219                                                         speed));
 220     }
 221
 222   scalar_int_mode int_mode_to;
 223   if (is_a <scalar_int_mode> (mode, &int_mode_to))
 224     {
 225       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 226            mode_from = (machine_mode)(mode_from + 1))
 227         init_expmed_one_conv (all, int_mode_to,
 228                               as_a <scalar_int_mode> (mode_from), speed);
 229
 230       scalar_int_mode wider_mode;
 231       if (GET_MODE_CLASS (int_mode_to) == MODE_INT
 232           && GET_MODE_WIDER_MODE (int_mode_to).exists (&wider_mode))
 233         {
 234           PUT_MODE (all->reg, mode);
 235           PUT_MODE (all->zext, wider_mode);
 236           PUT_MODE (all->wide_mult, wider_mode);
 237           PUT_MODE (all->wide_lshr, wider_mode);
 238           XEXP (all->wide_lshr, 1)
 239             = gen_int_shift_amount (wider_mode, mode_bitsize);
 240
 241           set_mul_widen_cost (speed, wider_mode,
 242                               set_src_cost (all->wide_mult, wider_mode, speed));
 243           set_mul_highpart_cost (speed, int_mode_to,
 244                                  set_src_cost (all->wide_trunc,
 245                                                int_mode_to, speed));
 246         }
 247     }
 248 }
 249
 250 void
 251 init_expmed (void)
 252 {
 253   struct init_expmed_rtl all;
 254   machine_mode mode = QImode;
 255   int m, speed;
 256
 257   memset (&all, 0, sizeof all);
 258   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 259     {
 260       all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
 261       all.cint[m] = GEN_INT (m);
 262     }
 263
 264   /* Avoid using hard regs in ways which may be unsupported.  */
 265   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 266   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 267   all.neg = gen_rtx_NEG (mode, all.reg);
 268   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 269   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 270   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 271   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 272   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 273   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 274   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 275   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 276   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 277   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 278   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 279   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 280   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 281   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 282   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 283
 284   for (speed = 0; speed < 2; speed++)
 285     {
 286       crtl->maybe_hot_insn_p = speed;
 287       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
 288
 289       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 290            mode = (machine_mode)(mode + 1))
 291         init_expmed_one_mode (&all, mode, speed);
 292
 293       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 294         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 295              mode = (machine_mode)(mode + 1))
 296           init_expmed_one_mode (&all, mode, speed);
 297
 298       if (MIN_MODE_VECTOR_INT != VOIDmode)
 299         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 300              mode = (machine_mode)(mode + 1))
 301           init_expmed_one_mode (&all, mode, speed);
 302     }
 303
 304   if (alg_hash_used_p ())
 305     {
 306       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 307       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 308     }
 309   else
 310     set_alg_hash_used_p (true);
 311   default_rtl_profile ();
 312
 313   ggc_free (all.trunc);
 314   ggc_free (all.shift_sub1);
 315   ggc_free (all.shift_sub0);
 316   ggc_free (all.shift_add);
 317   ggc_free (all.shift_mult);
 318   ggc_free (all.shift);
 319   ggc_free (all.wide_trunc);
 320   ggc_free (all.wide_lshr);
 321   ggc_free (all.wide_mult);
 322   ggc_free (all.zext);
 323   ggc_free (all.smod_32);
 324   ggc_free (all.sdiv_32);
 325   ggc_free (all.udiv);
 326   ggc_free (all.sdiv);
 327   ggc_free (all.mult);
 328   ggc_free (all.neg);
 329   ggc_free (all.plus);
 330   ggc_free (all.reg);
 331 }
 332
 333 /* Return an rtx representing minus the value of X.
 334    MODE is the intended mode of the result,
 335    useful if X is a CONST_INT.  */
 336
 337 rtx
 338 negate_rtx (machine_mode mode, rtx x)
 339 {
 340   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 341
 342   if (result == 0)
 343     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 344
 345   return result;
 346 }
 347
 348 /* Whether reverse storage order is supported on the target.  */
 349 static int reverse_storage_order_supported = -1;
 350
 351 /* Check whether reverse storage order is supported on the target.  */
 352
 353 static void
 354 check_reverse_storage_order_support (void)
 355 {
 356   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 357     {
 358       reverse_storage_order_supported = 0;
 359       sorry ("reverse scalar storage order");
 360     }
 361   else
 362     reverse_storage_order_supported = 1;
 363 }
 364
 365 /* Whether reverse FP storage order is supported on the target.  */
 366 static int reverse_float_storage_order_supported = -1;
 367
 368 /* Check whether reverse FP storage order is supported on the target.  */
 369
 370 static void
 371 check_reverse_float_storage_order_support (void)
 372 {
 373   if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 374     {
 375       reverse_float_storage_order_supported = 0;
 376       sorry ("reverse floating-point scalar storage order");
 377     }
 378   else
 379     reverse_float_storage_order_supported = 1;
 380 }
 381
 382 /* Return an rtx representing value of X with reverse storage order.
 383    MODE is the intended mode of the result,
 384    useful if X is a CONST_INT.  */
 385
 386 rtx
 387 flip_storage_order (machine_mode mode, rtx x)
 388 {
 389   scalar_int_mode int_mode;
 390   rtx result;
 391
 392   if (mode == QImode)
 393     return x;
 394
 395   if (COMPLEX_MODE_P (mode))
 396     {
 397       rtx real = read_complex_part (x, false);
 398       rtx imag = read_complex_part (x, true);
 399
 400       real = flip_storage_order (GET_MODE_INNER (mode), real);
 401       imag = flip_storage_order (GET_MODE_INNER (mode), imag);
 402
 403       return gen_rtx_CONCAT (mode, real, imag);
 404     }
 405
 406   if (__builtin_expect (reverse_storage_order_supported < 0, 0))
 407     check_reverse_storage_order_support ();
 408
 409   if (!is_a <scalar_int_mode> (mode, &int_mode))
 410     {
 411       if (FLOAT_MODE_P (mode)
 412           && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
 413         check_reverse_float_storage_order_support ();
 414
 415       if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode))
 416         {
 417           sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
 418           return x;
 419         }
 420       x = gen_lowpart (int_mode, x);
 421     }
 422
 423   result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
 424   if (result == 0)
 425     result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
 426
 427   if (int_mode != mode)
 428     result = gen_lowpart (mode, result);
 429
 430   return result;
 431 }
 432
 433 /* If MODE is set, adjust bitfield memory MEM so that it points to the
 434    first unit of mode MODE that contains a bitfield of size BITSIZE at
 435    bit position BITNUM.  If MODE is not set, return a BLKmode reference
 436    to every byte in the bitfield.  Set *NEW_BITNUM to the bit position
 437    of the field within the new memory.  */
 438
 439 static rtx
 440 narrow_bit_field_mem (rtx mem, opt_scalar_int_mode mode,
 441                       unsigned HOST_WIDE_INT bitsize,
 442                       unsigned HOST_WIDE_INT bitnum,
 443                       unsigned HOST_WIDE_INT *new_bitnum)
 444 {
 445   scalar_int_mode imode;
 446   if (mode.exists (&imode))
 447     {
 448       unsigned int unit = GET_MODE_BITSIZE (imode);
 449       *new_bitnum = bitnum % unit;
 450       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 451       return adjust_bitfield_address (mem, imode, offset);
 452     }
 453   else
 454     {
 455       *new_bitnum = bitnum % BITS_PER_UNIT;
 456       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 457       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 458                             / BITS_PER_UNIT);
 459       return adjust_bitfield_address_size (mem, BLKmode, offset, size);
 460     }
 461 }
 462
 463 /* The caller wants to perform insertion or extraction PATTERN on a
 464    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 465    BITREGION_START and BITREGION_END are as for store_bit_field
 466    and FIELDMODE is the natural mode of the field.
 467
 468    Search for a mode that is compatible with the memory access
 469    restrictions and (where applicable) with a register insertion or
 470    extraction.  Return the new memory on success, storing the adjusted
 471    bit position in *NEW_BITNUM.  Return null otherwise.  */
 472
 473 static rtx
 474 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 475                               rtx op0, HOST_WIDE_INT bitsize,
 476                               HOST_WIDE_INT bitnum,
 477                               poly_uint64 bitregion_start,
 478                               poly_uint64 bitregion_end,
 479                               machine_mode fieldmode,
 480                               unsigned HOST_WIDE_INT *new_bitnum)
 481 {
 482   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 483                                 bitregion_end, MEM_ALIGN (op0),
 484                                 MEM_VOLATILE_P (op0));
 485   scalar_int_mode best_mode;
 486   if (iter.next_mode (&best_mode))
 487     {
 488       /* We can use a memory in BEST_MODE.  See whether this is true for
 489          any wider modes.  All other things being equal, we prefer to
 490          use the widest mode possible because it tends to expose more
 491          CSE opportunities.  */
 492       if (!iter.prefer_smaller_modes ())
 493         {
 494           /* Limit the search to the mode required by the corresponding
 495              register insertion or extraction instruction, if any.  */
 496           scalar_int_mode limit_mode = word_mode;
 497           extraction_insn insn;
 498           if (get_best_reg_extraction_insn (&insn, pattern,
 499                                             GET_MODE_BITSIZE (best_mode),
 500                                             fieldmode))
 501             limit_mode = insn.field_mode;
 502
 503           scalar_int_mode wider_mode;
 504           while (iter.next_mode (&wider_mode)
 505                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 506             best_mode = wider_mode;
 507         }
 508       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 509                                    new_bitnum);
 510     }
 511   return NULL_RTX;
 512 }
 513
 514 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 515    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 516    offset is then BITNUM / BITS_PER_UNIT.  */
 517
 518 static bool
 519 lowpart_bit_field_p (poly_uint64 bitnum, poly_uint64 bitsize,
 520                      machine_mode struct_mode)
 521 {
 522   poly_uint64 regsize = REGMODE_NATURAL_SIZE (struct_mode);
 523   if (BYTES_BIG_ENDIAN)
 524     return (multiple_p (bitnum, BITS_PER_UNIT)
 525             && (known_eq (bitnum + bitsize, GET_MODE_BITSIZE (struct_mode))
 526                 || multiple_p (bitnum + bitsize,
 527                                regsize * BITS_PER_UNIT)));
 528   else
 529     return multiple_p (bitnum, regsize * BITS_PER_UNIT);
 530 }
 531
 532 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 533    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 534    Return false if the access would touch memory outside the range
 535    BITREGION_START to BITREGION_END for conformance to the C++ memory
 536    model.  */
 537
 538 static bool
 539 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 540                             unsigned HOST_WIDE_INT bitnum,
 541                             scalar_int_mode fieldmode,
 542                             poly_uint64 bitregion_start,
 543                             poly_uint64 bitregion_end)
 544 {
 545   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 546
 547   /* -fstrict-volatile-bitfields must be enabled and we must have a
 548      volatile MEM.  */
 549   if (!MEM_P (op0)
 550       || !MEM_VOLATILE_P (op0)
 551       || flag_strict_volatile_bitfields <= 0)
 552     return false;
 553
 554   /* The bit size must not be larger than the field mode, and
 555      the field mode must not be larger than a word.  */
 556   if (bitsize > modesize || modesize > BITS_PER_WORD)
 557     return false;
 558
 559   /* Check for cases of unaligned fields that must be split.  */
 560   if (bitnum % modesize + bitsize > modesize)
 561     return false;
 562
 563   /* The memory must be sufficiently aligned for a MODESIZE access.
 564      This condition guarantees, that the memory access will not
 565      touch anything after the end of the structure.  */
 566   if (MEM_ALIGN (op0) < modesize)
 567     return false;
 568
 569   /* Check for cases where the C++ memory model applies.  */
 570   if (maybe_ne (bitregion_end, 0U)
 571       && (maybe_lt (bitnum - bitnum % modesize, bitregion_start)
 572           || maybe_gt (bitnum - bitnum % modesize + modesize - 1,
 573                        bitregion_end)))
 574     return false;
 575
 576   return true;
 577 }
 578
 579 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 580    bit number BITNUM can be treated as a simple value of mode MODE.
 581    Store the byte offset in *BYTENUM if so.  */
 582
 583 static bool
 584 simple_mem_bitfield_p (rtx op0, poly_uint64 bitsize, poly_uint64 bitnum,
 585                        machine_mode mode, poly_uint64 *bytenum)
 586 {
 587   return (MEM_P (op0)
 588           && multiple_p (bitnum, BITS_PER_UNIT, bytenum)
 589           && known_eq (bitsize, GET_MODE_BITSIZE (mode))
 590           && (!targetm.slow_unaligned_access (mode, MEM_ALIGN (op0))
 591               || (multiple_p (bitnum, GET_MODE_ALIGNMENT (mode))
 592                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 593 }
 594 \f
 595 /* Try to use instruction INSV to store VALUE into a field of OP0.
 596    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a
 597    BLKmode MEM.  VALUE_MODE is the mode of VALUE.  BITSIZE and BITNUM
 598    are as for store_bit_field.  */
 599
 600 static bool
 601 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 602                             opt_scalar_int_mode op0_mode,
 603                             unsigned HOST_WIDE_INT bitsize,
 604                             unsigned HOST_WIDE_INT bitnum,
 605                             rtx value, scalar_int_mode value_mode)
 606 {
 607   class expand_operand ops[4];
 608   rtx value1;
 609   rtx xop0 = op0;
 610   rtx_insn *last = get_last_insn ();
 611   bool copy_back = false;
 612
 613   scalar_int_mode op_mode = insv->field_mode;
 614   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 615   if (bitsize == 0 || bitsize > unit)
 616     return false;
 617
 618   if (MEM_P (xop0))
 619     /* Get a reference to the first byte of the field.  */
 620     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 621                                  &bitnum);
 622   else
 623     {
 624       /* Convert from counting within OP0 to counting in OP_MODE.  */
 625       if (BYTES_BIG_ENDIAN)
 626         bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
 627
 628       /* If xop0 is a register, we need it in OP_MODE
 629          to make it acceptable to the format of insv.  */
 630       if (GET_CODE (xop0) == SUBREG)
 631         /* We can't just change the mode, because this might clobber op0,
 632            and we will need the original value of op0 if insv fails.  */
 633         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 634       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 635         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 636     }
 637
 638   /* If the destination is a paradoxical subreg such that we need a
 639      truncate to the inner mode, perform the insertion on a temporary and
 640      truncate the result to the original destination.  Note that we can't
 641      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 642      X) 0)) is (reg:N X).  */
 643   if (GET_CODE (xop0) == SUBREG
 644       && REG_P (SUBREG_REG (xop0))
 645       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 646                                          op_mode))
 647     {
 648       rtx tem = gen_reg_rtx (op_mode);
 649       emit_move_insn (tem, xop0);
 650       xop0 = tem;
 651       copy_back = true;
 652     }
 653
 654   /* There are similar overflow check at the start of store_bit_field_1,
 655      but that only check the situation where the field lies completely
 656      outside the register, while there do have situation where the field
 657      lies partialy in the register, we need to adjust bitsize for this
 658      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
 659      will broken on those arch support bit insert instruction, like arm, aarch64
 660      etc.  */
 661   if (bitsize + bitnum > unit && bitnum < unit)
 662     {
 663       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
 664                "destination object, data truncated into %wu-bit",
 665                bitsize, unit - bitnum);
 666       bitsize = unit - bitnum;
 667     }
 668
 669   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 670      "backwards" from the size of the unit we are inserting into.
 671      Otherwise, we count bits from the most significant on a
 672      BYTES/BITS_BIG_ENDIAN machine.  */
 673
 674   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 675     bitnum = unit - bitsize - bitnum;
 676
 677   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 678   value1 = value;
 679   if (value_mode != op_mode)
 680     {
 681       if (GET_MODE_BITSIZE (value_mode) >= bitsize)
 682         {
 683           rtx tmp;
 684           /* Optimization: Don't bother really extending VALUE
 685              if it has all the bits we will actually use.  However,
 686              if we must narrow it, be sure we do it correctly.  */
 687
 688           if (GET_MODE_SIZE (value_mode) < GET_MODE_SIZE (op_mode))
 689             {
 690               tmp = simplify_subreg (op_mode, value1, value_mode, 0);
 691               if (! tmp)
 692                 tmp = simplify_gen_subreg (op_mode,
 693                                            force_reg (value_mode, value1),
 694                                            value_mode, 0);
 695             }
 696           else
 697             {
 698               tmp = gen_lowpart_if_possible (op_mode, value1);
 699               if (! tmp)
 700                 tmp = gen_lowpart (op_mode, force_reg (value_mode, value1));
 701             }
 702           value1 = tmp;
 703         }
 704       else if (CONST_INT_P (value))
 705         value1 = gen_int_mode (INTVAL (value), op_mode);
 706       else
 707         /* Parse phase is supposed to make VALUE's data type
 708            match that of the component reference, which is a type
 709            at least as wide as the field; so VALUE should have
 710            a mode that corresponds to that type.  */
 711         gcc_assert (CONSTANT_P (value));
 712     }
 713
 714   create_fixed_operand (&ops[0], xop0);
 715   create_integer_operand (&ops[1], bitsize);
 716   create_integer_operand (&ops[2], bitnum);
 717   create_input_operand (&ops[3], value1, op_mode);
 718   if (maybe_expand_insn (insv->icode, 4, ops))
 719     {
 720       if (copy_back)
 721         convert_move (op0, xop0, true);
 722       return true;
 723     }
 724   delete_insns_since (last);
 725   return false;
 726 }
 727
 728 /* A subroutine of store_bit_field, with the same arguments.  Return true
 729    if the operation could be implemented.
 730
 731    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 732    no other way of implementing the operation.  If FALLBACK_P is false,
 733    return false instead.  */
 734
 735 static bool
 736 store_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
 737                    poly_uint64 bitregion_start, poly_uint64 bitregion_end,
 738                    machine_mode fieldmode,
 739                    rtx value, bool reverse, bool fallback_p)
 740 {
 741   rtx op0 = str_rtx;
 742
 743   while (GET_CODE (op0) == SUBREG)
 744     {
 745       bitnum += subreg_memory_offset (op0) * BITS_PER_UNIT;
 746       op0 = SUBREG_REG (op0);
 747     }
 748
 749   /* No action is needed if the target is a register and if the field
 750      lies completely outside that register.  This can occur if the source
 751      code contains an out-of-bounds access to a small array.  */
 752   if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
 753     return true;
 754
 755   /* Use vec_set patterns for inserting parts of vectors whenever
 756      available.  */
 757   machine_mode outermode = GET_MODE (op0);
 758   scalar_mode innermode = GET_MODE_INNER (outermode);
 759   poly_uint64 pos;
 760   if (VECTOR_MODE_P (outermode)
 761       && !MEM_P (op0)
 762       && optab_handler (vec_set_optab, outermode) != CODE_FOR_nothing
 763       && fieldmode == innermode
 764       && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
 765       && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
 766     {
 767       class expand_operand ops[3];
 768       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 769
 770       create_fixed_operand (&ops[0], op0);
 771       create_input_operand (&ops[1], value, innermode);
 772       create_integer_operand (&ops[2], pos);
 773       if (maybe_expand_insn (icode, 3, ops))
 774         return true;
 775     }
 776
 777   /* If the target is a register, overwriting the entire object, or storing
 778      a full-word or multi-word field can be done with just a SUBREG.  */
 779   if (!MEM_P (op0)
 780       && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode)))
 781     {
 782       /* Use the subreg machinery either to narrow OP0 to the required
 783          words or to cope with mode punning between equal-sized modes.
 784          In the latter case, use subreg on the rhs side, not lhs.  */
 785       rtx sub;
 786       HOST_WIDE_INT regnum;
 787       poly_uint64 regsize = REGMODE_NATURAL_SIZE (GET_MODE (op0));
 788       if (known_eq (bitnum, 0U)
 789           && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
 790         {
 791           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 792           if (sub)
 793             {
 794               if (reverse)
 795                 sub = flip_storage_order (GET_MODE (op0), sub);
 796               emit_move_insn (op0, sub);
 797               return true;
 798             }
 799         }
 800       else if (constant_multiple_p (bitnum, regsize * BITS_PER_UNIT, &regnum)
 801                && multiple_p (bitsize, regsize * BITS_PER_UNIT))
 802         {
 803           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 804                                      regnum * regsize);
 805           if (sub)
 806             {
 807               if (reverse)
 808                 value = flip_storage_order (fieldmode, value);
 809               emit_move_insn (sub, value);
 810               return true;
 811             }
 812         }
 813     }
 814
 815   /* If the target is memory, storing any naturally aligned field can be
 816      done with a simple store.  For targets that support fast unaligned
 817      memory, any naturally sized, unit aligned field can be done directly.  */
 818   poly_uint64 bytenum;
 819   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode, &bytenum))
 820     {
 821       op0 = adjust_bitfield_address (op0, fieldmode, bytenum);
 822       if (reverse)
 823         value = flip_storage_order (fieldmode, value);
 824       emit_move_insn (op0, value);
 825       return true;
 826     }
 827
 828   /* It's possible we'll need to handle other cases here for
 829      polynomial bitnum and bitsize.  */
 830
 831   /* From here on we need to be looking at a fixed-size insertion.  */
 832   unsigned HOST_WIDE_INT ibitsize = bitsize.to_constant ();
 833   unsigned HOST_WIDE_INT ibitnum = bitnum.to_constant ();
 834
 835   /* Make sure we are playing with integral modes.  Pun with subregs
 836      if we aren't.  This must come after the entire register case above,
 837      since that case is valid for any mode.  The following cases are only
 838      valid for integral modes.  */
 839   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
 840   scalar_int_mode imode;
 841   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
 842     {
 843       if (MEM_P (op0))
 844         op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
 845                                             0, MEM_SIZE (op0));
 846       else if (!op0_mode.exists ())
 847         {
 848           if (ibitnum == 0
 849               && known_eq (ibitsize, GET_MODE_BITSIZE (GET_MODE (op0)))
 850               && MEM_P (value)
 851               && !reverse)
 852             {
 853               value = adjust_address (value, GET_MODE (op0), 0);
 854               emit_move_insn (op0, value);
 855               return true;
 856             }
 857           if (!fallback_p)
 858             return false;
 859           rtx temp = assign_stack_temp (GET_MODE (op0),
 860                                         GET_MODE_SIZE (GET_MODE (op0)));
 861           emit_move_insn (temp, op0);
 862           store_bit_field_1 (temp, bitsize, bitnum, 0, 0, fieldmode, value,
 863                              reverse, fallback_p);
 864           emit_move_insn (op0, temp);
 865           return true;
 866         }
 867       else
 868         op0 = gen_lowpart (op0_mode.require (), op0);
 869     }
 870
 871   return store_integral_bit_field (op0, op0_mode, ibitsize, ibitnum,
 872                                    bitregion_start, bitregion_end,
 873                                    fieldmode, value, reverse, fallback_p);
 874 }
 875
 876 /* Subroutine of store_bit_field_1, with the same arguments, except
 877    that BITSIZE and BITNUM are constant.  Handle cases specific to
 878    integral modes.  If OP0_MODE is defined, it is the mode of OP0,
 879    otherwise OP0 is a BLKmode MEM.  */
 880
 881 static bool
 882 store_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
 883                           unsigned HOST_WIDE_INT bitsize,
 884                           unsigned HOST_WIDE_INT bitnum,
 885                           poly_uint64 bitregion_start,
 886                           poly_uint64 bitregion_end,
 887                           machine_mode fieldmode,
 888                           rtx value, bool reverse, bool fallback_p)
 889 {
 890   /* Storing an lsb-aligned field in a register
 891      can be done with a movstrict instruction.  */
 892
 893   if (!MEM_P (op0)
 894       && !reverse
 895       && lowpart_bit_field_p (bitnum, bitsize, op0_mode.require ())
 896       && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode))
 897       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 898     {
 899       class expand_operand ops[2];
 900       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 901       rtx arg0 = op0;
 902       unsigned HOST_WIDE_INT subreg_off;
 903
 904       if (GET_CODE (arg0) == SUBREG)
 905         {
 906           /* Else we've got some float mode source being extracted into
 907              a different float mode destination -- this combination of
 908              subregs results in Severe Tire Damage.  */
 909           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 910                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 911                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 912           arg0 = SUBREG_REG (arg0);
 913         }
 914
 915       subreg_off = bitnum / BITS_PER_UNIT;
 916       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 917         {
 918           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 919
 920           create_fixed_operand (&ops[0], arg0);
 921           /* Shrink the source operand to FIELDMODE.  */
 922           create_convert_operand_to (&ops[1], value, fieldmode, false);
 923           if (maybe_expand_insn (icode, 2, ops))
 924             return true;
 925         }
 926     }
 927
 928   /* Handle fields bigger than a word.  */
 929
 930   if (bitsize > BITS_PER_WORD)
 931     {
 932       /* Here we transfer the words of the field
 933          in the order least significant first.
 934          This is because the most significant word is the one which may
 935          be less than full.
 936          However, only do that if the value is not BLKmode.  */
 937
 938       const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 939       const int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 940       rtx_insn *last;
 941
 942       /* This is the mode we must force value to, so that there will be enough
 943          subwords to extract.  Note that fieldmode will often (always?) be
 944          VOIDmode, because that is what store_field uses to indicate that this
 945          is a bit field, but passing VOIDmode to operand_subword_force
 946          is not allowed.
 947
 948          The mode must be fixed-size, since insertions into variable-sized
 949          objects are meant to be handled before calling this function.  */
 950       fixed_size_mode value_mode = as_a <fixed_size_mode> (GET_MODE (value));
 951       if (value_mode == VOIDmode)
 952         value_mode = smallest_int_mode_for_size (nwords * BITS_PER_WORD);
 953
 954       last = get_last_insn ();
 955       for (int i = 0; i < nwords; i++)
 956         {
 957           /* Number of bits to be stored in this iteration, i.e. BITS_PER_WORD
 958              except maybe for the last iteration.  */
 959           const unsigned HOST_WIDE_INT new_bitsize
 960             = MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 961           /* Bit offset from the starting bit number in the target.  */
 962           const unsigned int bit_offset
 963             = backwards ^ reverse
 964               ? MAX ((int) bitsize - (i + 1) * BITS_PER_WORD, 0)
 965               : i * BITS_PER_WORD;
 966           /* Starting word number in the value.  */
 967           const unsigned int wordnum
 968             = backwards
 969               ? GET_MODE_SIZE (value_mode) / UNITS_PER_WORD - (i + 1)
 970               : i;
 971           /* The chunk of the value in word_mode.  We use bit-field extraction
 972               in BLKmode to handle unaligned memory references and to shift the
 973               last chunk right on big-endian machines if need be.  */
 974           rtx value_word
 975             = fieldmode == BLKmode
 976               ? extract_bit_field (value, new_bitsize, wordnum * BITS_PER_WORD,
 977                                    1, NULL_RTX, word_mode, word_mode, false,
 978                                    NULL)
 979               : operand_subword_force (value, wordnum, value_mode);
 980
 981           if (!store_bit_field_1 (op0, new_bitsize,
 982                                   bitnum + bit_offset,
 983                                   bitregion_start, bitregion_end,
 984                                   word_mode,
 985                                   value_word, reverse, fallback_p))
 986             {
 987               delete_insns_since (last);
 988               return false;
 989             }
 990         }
 991       return true;
 992     }
 993
 994   /* If VALUE has a floating-point or complex mode, access it as an
 995      integer of the corresponding size.  This can occur on a machine
 996      with 64 bit registers that uses SFmode for float.  It can also
 997      occur for unaligned float or complex fields.  */
 998   rtx orig_value = value;
 999   scalar_int_mode value_mode;
1000   if (GET_MODE (value) == VOIDmode)
1001     /* By this point we've dealt with values that are bigger than a word,
1002        so word_mode is a conservatively correct choice.  */
1003     value_mode = word_mode;
1004   else if (!is_a <scalar_int_mode> (GET_MODE (value), &value_mode))
1005     {
1006       value_mode = int_mode_for_mode (GET_MODE (value)).require ();
1007       value = gen_reg_rtx (value_mode);
1008       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
1009     }
1010
1011   /* If OP0 is a multi-word register, narrow it to the affected word.
1012      If the region spans two words, defer to store_split_bit_field.
1013      Don't do this if op0 is a single hard register wider than word
1014      such as a float or vector register.  */
1015   if (!MEM_P (op0)
1016       && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD
1017       && (!REG_P (op0)
1018           || !HARD_REGISTER_P (op0)
1019           || hard_regno_nregs (REGNO (op0), op0_mode.require ()) != 1))
1020     {
1021       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1022         {
1023           if (!fallback_p)
1024             return false;
1025
1026           store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1027                                  bitregion_start, bitregion_end,
1028                                  value, value_mode, reverse);
1029           return true;
1030         }
1031       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1032                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1033       gcc_assert (op0);
1034       op0_mode = word_mode;
1035       bitnum %= BITS_PER_WORD;
1036     }
1037
1038   /* From here on we can assume that the field to be stored in fits
1039      within a word.  If the destination is a register, it too fits
1040      in a word.  */
1041
1042   extraction_insn insv;
1043   if (!MEM_P (op0)
1044       && !reverse
1045       && get_best_reg_extraction_insn (&insv, EP_insv,
1046                                        GET_MODE_BITSIZE (op0_mode.require ()),
1047                                        fieldmode)
1048       && store_bit_field_using_insv (&insv, op0, op0_mode,
1049                                      bitsize, bitnum, value, value_mode))
1050     return true;
1051
1052   /* If OP0 is a memory, try copying it to a register and seeing if a
1053      cheap register alternative is available.  */
1054   if (MEM_P (op0) && !reverse)
1055     {
1056       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1057                                         fieldmode)
1058           && store_bit_field_using_insv (&insv, op0, op0_mode,
1059                                          bitsize, bitnum, value, value_mode))
1060         return true;
1061
1062       rtx_insn *last = get_last_insn ();
1063
1064       /* Try loading part of OP0 into a register, inserting the bitfield
1065          into that, and then copying the result back to OP0.  */
1066       unsigned HOST_WIDE_INT bitpos;
1067       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1068                                                bitregion_start, bitregion_end,
1069                                                fieldmode, &bitpos);
1070       if (xop0)
1071         {
1072           rtx tempreg = copy_to_reg (xop0);
1073           if (store_bit_field_1 (tempreg, bitsize, bitpos,
1074                                  bitregion_start, bitregion_end,
1075                                  fieldmode, orig_value, reverse, false))
1076             {
1077               emit_move_insn (xop0, tempreg);
1078               return true;
1079             }
1080           delete_insns_since (last);
1081         }
1082     }
1083
1084   if (!fallback_p)
1085     return false;
1086
1087   store_fixed_bit_field (op0, op0_mode, bitsize, bitnum, bitregion_start,
1088                          bitregion_end, value, value_mode, reverse);
1089   return true;
1090 }
1091
1092 /* Generate code to store value from rtx VALUE
1093    into a bit-field within structure STR_RTX
1094    containing BITSIZE bits starting at bit BITNUM.
1095
1096    BITREGION_START is bitpos of the first bitfield in this region.
1097    BITREGION_END is the bitpos of the ending bitfield in this region.
1098    These two fields are 0, if the C++ memory model does not apply,
1099    or we are not interested in keeping track of bitfield regions.
1100
1101    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1102
1103    If REVERSE is true, the store is to be done in reverse order.  */
1104
1105 void
1106 store_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1107                  poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1108                  machine_mode fieldmode,
1109                  rtx value, bool reverse)
1110 {
1111   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1112   unsigned HOST_WIDE_INT ibitsize = 0, ibitnum = 0;
1113   scalar_int_mode int_mode;
1114   if (bitsize.is_constant (&ibitsize)
1115       && bitnum.is_constant (&ibitnum)
1116       && is_a <scalar_int_mode> (fieldmode, &int_mode)
1117       && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum, int_mode,
1118                                      bitregion_start, bitregion_end))
1119     {
1120       /* Storing of a full word can be done with a simple store.
1121          We know here that the field can be accessed with one single
1122          instruction.  For targets that support unaligned memory,
1123          an unaligned access may be necessary.  */
1124       if (ibitsize == GET_MODE_BITSIZE (int_mode))
1125         {
1126           str_rtx = adjust_bitfield_address (str_rtx, int_mode,
1127                                              ibitnum / BITS_PER_UNIT);
1128           if (reverse)
1129             value = flip_storage_order (int_mode, value);
1130           gcc_assert (ibitnum % BITS_PER_UNIT == 0);
1131           emit_move_insn (str_rtx, value);
1132         }
1133       else
1134         {
1135           rtx temp;
1136
1137           str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize,
1138                                           ibitnum, &ibitnum);
1139           gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
1140           temp = copy_to_reg (str_rtx);
1141           if (!store_bit_field_1 (temp, ibitsize, ibitnum, 0, 0,
1142                                   int_mode, value, reverse, true))
1143             gcc_unreachable ();
1144
1145           emit_move_insn (str_rtx, temp);
1146         }
1147
1148       return;
1149     }
1150
1151   /* Under the C++0x memory model, we must not touch bits outside the
1152      bit region.  Adjust the address to start at the beginning of the
1153      bit region.  */
1154   if (MEM_P (str_rtx) && maybe_ne (bitregion_start, 0U))
1155     {
1156       scalar_int_mode best_mode;
1157       machine_mode addr_mode = VOIDmode;
1158
1159       poly_uint64 offset = exact_div (bitregion_start, BITS_PER_UNIT);
1160       bitnum -= bitregion_start;
1161       poly_int64 size = bits_to_bytes_round_up (bitnum + bitsize);
1162       bitregion_end -= bitregion_start;
1163       bitregion_start = 0;
1164       if (bitsize.is_constant (&ibitsize)
1165           && bitnum.is_constant (&ibitnum)
1166           && get_best_mode (ibitsize, ibitnum,
1167                             bitregion_start, bitregion_end,
1168                             MEM_ALIGN (str_rtx), INT_MAX,
1169                             MEM_VOLATILE_P (str_rtx), &best_mode))
1170         addr_mode = best_mode;
1171       str_rtx = adjust_bitfield_address_size (str_rtx, addr_mode,
1172                                               offset, size);
1173     }
1174
1175   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1176                           bitregion_start, bitregion_end,
1177                           fieldmode, value, reverse, true))
1178     gcc_unreachable ();
1179 }
1180 \f
1181 /* Use shifts and boolean operations to store VALUE into a bit field of
1182    width BITSIZE in OP0, starting at bit BITNUM.  If OP0_MODE is defined,
1183    it is the mode of OP0, otherwise OP0 is a BLKmode MEM.  VALUE_MODE is
1184    the mode of VALUE.
1185
1186    If REVERSE is true, the store is to be done in reverse order.  */
1187
1188 static void
1189 store_fixed_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1190                        unsigned HOST_WIDE_INT bitsize,
1191                        unsigned HOST_WIDE_INT bitnum,
1192                        poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1193                        rtx value, scalar_int_mode value_mode, bool reverse)
1194 {
1195   /* There is a case not handled here:
1196      a structure with a known alignment of just a halfword
1197      and a field split across two aligned halfwords within the structure.
1198      Or likewise a structure with a known alignment of just a byte
1199      and a field split across two bytes.
1200      Such cases are not supposed to be able to occur.  */
1201
1202   scalar_int_mode best_mode;
1203   if (MEM_P (op0))
1204     {
1205       unsigned int max_bitsize = BITS_PER_WORD;
1206       scalar_int_mode imode;
1207       if (op0_mode.exists (&imode) && GET_MODE_BITSIZE (imode) < max_bitsize)
1208         max_bitsize = GET_MODE_BITSIZE (imode);
1209
1210       if (!get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1211                           MEM_ALIGN (op0), max_bitsize, MEM_VOLATILE_P (op0),
1212                           &best_mode))
1213         {
1214           /* The only way this should occur is if the field spans word
1215              boundaries.  */
1216           store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1217                                  bitregion_start, bitregion_end,
1218                                  value, value_mode, reverse);
1219           return;
1220         }
1221
1222       op0 = narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, &bitnum);
1223     }
1224   else
1225     best_mode = op0_mode.require ();
1226
1227   store_fixed_bit_field_1 (op0, best_mode, bitsize, bitnum,
1228                            value, value_mode, reverse);
1229 }
1230
1231 /* Helper function for store_fixed_bit_field, stores
1232    the bit field always using MODE, which is the mode of OP0.  The other
1233    arguments are as for store_fixed_bit_field.  */
1234
1235 static void
1236 store_fixed_bit_field_1 (rtx op0, scalar_int_mode mode,
1237                          unsigned HOST_WIDE_INT bitsize,
1238                          unsigned HOST_WIDE_INT bitnum,
1239                          rtx value, scalar_int_mode value_mode, bool reverse)
1240 {
1241   rtx temp;
1242   int all_zero = 0;
1243   int all_one = 0;
1244
1245   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1246      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1247
1248   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1249     /* BITNUM is the distance between our msb
1250        and that of the containing datum.
1251        Convert it to the distance from the lsb.  */
1252     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1253
1254   /* Now BITNUM is always the distance between our lsb
1255      and that of OP0.  */
1256
1257   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1258      we must first convert its mode to MODE.  */
1259
1260   if (CONST_INT_P (value))
1261     {
1262       unsigned HOST_WIDE_INT v = UINTVAL (value);
1263
1264       if (bitsize < HOST_BITS_PER_WIDE_INT)
1265         v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1266
1267       if (v == 0)
1268         all_zero = 1;
1269       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1270                 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1271                || (bitsize == HOST_BITS_PER_WIDE_INT
1272                    && v == HOST_WIDE_INT_M1U))
1273         all_one = 1;
1274
1275       value = lshift_value (mode, v, bitnum);
1276     }
1277   else
1278     {
1279       int must_and = (GET_MODE_BITSIZE (value_mode) != bitsize
1280                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1281
1282       if (value_mode != mode)
1283         value = convert_to_mode (mode, value, 1);
1284
1285       if (must_and)
1286         value = expand_binop (mode, and_optab, value,
1287                               mask_rtx (mode, 0, bitsize, 0),
1288                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1289       if (bitnum > 0)
1290         value = expand_shift (LSHIFT_EXPR, mode, value,
1291                               bitnum, NULL_RTX, 1);
1292     }
1293
1294   if (reverse)
1295     value = flip_storage_order (mode, value);
1296
1297   /* Now clear the chosen bits in OP0,
1298      except that if VALUE is -1 we need not bother.  */
1299   /* We keep the intermediates in registers to allow CSE to combine
1300      consecutive bitfield assignments.  */
1301
1302   temp = force_reg (mode, op0);
1303
1304   if (! all_one)
1305     {
1306       rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1307       if (reverse)
1308         mask = flip_storage_order (mode, mask);
1309       temp = expand_binop (mode, and_optab, temp, mask,
1310                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1311       temp = force_reg (mode, temp);
1312     }
1313
1314   /* Now logical-or VALUE into OP0, unless it is zero.  */
1315
1316   if (! all_zero)
1317     {
1318       temp = expand_binop (mode, ior_optab, temp, value,
1319                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1320       temp = force_reg (mode, temp);
1321     }
1322
1323   if (op0 != temp)
1324     {
1325       op0 = copy_rtx (op0);
1326       emit_move_insn (op0, temp);
1327     }
1328 }
1329 \f
1330 /* Store a bit field that is split across multiple accessible memory objects.
1331
1332    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1333    BITSIZE is the field width; BITPOS the position of its first bit
1334    (within the word).
1335    VALUE is the value to store, which has mode VALUE_MODE.
1336    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
1337    a BLKmode MEM.
1338
1339    If REVERSE is true, the store is to be done in reverse order.
1340
1341    This does not yet handle fields wider than BITS_PER_WORD.  */
1342
1343 static void
1344 store_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1345                        unsigned HOST_WIDE_INT bitsize,
1346                        unsigned HOST_WIDE_INT bitpos,
1347                        poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1348                        rtx value, scalar_int_mode value_mode, bool reverse)
1349 {
1350   unsigned int unit, total_bits, bitsdone = 0;
1351
1352   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1353      much at a time.  */
1354   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1355     unit = BITS_PER_WORD;
1356   else
1357     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1358
1359   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1360      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1361      again, and we will mutually recurse forever.  */
1362   if (MEM_P (op0) && op0_mode.exists ())
1363     unit = MIN (unit, GET_MODE_BITSIZE (op0_mode.require ()));
1364
1365   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1366      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1367      that VALUE might be a floating-point constant.  */
1368   if (CONSTANT_P (value) && !CONST_INT_P (value))
1369     {
1370       rtx word = gen_lowpart_common (word_mode, value);
1371
1372       if (word && (value != word))
1373         value = word;
1374       else
1375         value = gen_lowpart_common (word_mode, force_reg (value_mode, value));
1376       value_mode = word_mode;
1377     }
1378
1379   total_bits = GET_MODE_BITSIZE (value_mode);
1380
1381   while (bitsdone < bitsize)
1382     {
1383       unsigned HOST_WIDE_INT thissize;
1384       unsigned HOST_WIDE_INT thispos;
1385       unsigned HOST_WIDE_INT offset;
1386       rtx part;
1387
1388       offset = (bitpos + bitsdone) / unit;
1389       thispos = (bitpos + bitsdone) % unit;
1390
1391       /* When region of bytes we can touch is restricted, decrease
1392          UNIT close to the end of the region as needed.  If op0 is a REG
1393          or SUBREG of REG, don't do this, as there can't be data races
1394          on a register and we can expand shorter code in some cases.  */
1395       if (maybe_ne (bitregion_end, 0U)
1396           && unit > BITS_PER_UNIT
1397           && maybe_gt (bitpos + bitsdone - thispos + unit, bitregion_end + 1)
1398           && !REG_P (op0)
1399           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1400         {
1401           unit = unit / 2;
1402           continue;
1403         }
1404
1405       /* THISSIZE must not overrun a word boundary.  Otherwise,
1406          store_fixed_bit_field will call us again, and we will mutually
1407          recurse forever.  */
1408       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1409       thissize = MIN (thissize, unit - thispos);
1410
1411       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1412         {
1413           /* Fetch successively less significant portions.  */
1414           if (CONST_INT_P (value))
1415             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1416                              >> (bitsize - bitsdone - thissize))
1417                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1418           /* Likewise, but the source is little-endian.  */
1419           else if (reverse)
1420             part = extract_fixed_bit_field (word_mode, value, value_mode,
1421                                             thissize,
1422                                             bitsize - bitsdone - thissize,
1423                                             NULL_RTX, 1, false);
1424           else
1425             /* The args are chosen so that the last part includes the
1426                lsb.  Give extract_bit_field the value it needs (with
1427                endianness compensation) to fetch the piece we want.  */
1428             part = extract_fixed_bit_field (word_mode, value, value_mode,
1429                                             thissize,
1430                                             total_bits - bitsize + bitsdone,
1431                                             NULL_RTX, 1, false);
1432         }
1433       else
1434         {
1435           /* Fetch successively more significant portions.  */
1436           if (CONST_INT_P (value))
1437             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1438                              >> bitsdone)
1439                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1440           /* Likewise, but the source is big-endian.  */
1441           else if (reverse)
1442             part = extract_fixed_bit_field (word_mode, value, value_mode,
1443                                             thissize,
1444                                             total_bits - bitsdone - thissize,
1445                                             NULL_RTX, 1, false);
1446           else
1447             part = extract_fixed_bit_field (word_mode, value, value_mode,
1448                                             thissize, bitsdone, NULL_RTX,
1449                                             1, false);
1450         }
1451
1452       /* If OP0 is a register, then handle OFFSET here.  */
1453       rtx op0_piece = op0;
1454       opt_scalar_int_mode op0_piece_mode = op0_mode;
1455       if (SUBREG_P (op0) || REG_P (op0))
1456         {
1457           scalar_int_mode imode;
1458           if (op0_mode.exists (&imode)
1459               && GET_MODE_SIZE (imode) < UNITS_PER_WORD)
1460             {
1461               if (offset)
1462                 op0_piece = const0_rtx;
1463             }
1464           else
1465             {
1466               op0_piece = operand_subword_force (op0,
1467                                                  offset * unit / BITS_PER_WORD,
1468                                                  GET_MODE (op0));
1469               op0_piece_mode = word_mode;
1470             }
1471           offset &= BITS_PER_WORD / unit - 1;
1472         }
1473
1474       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1475          it is just an out-of-bounds access.  Ignore it.  */
1476       if (op0_piece != const0_rtx)
1477         store_fixed_bit_field (op0_piece, op0_piece_mode, thissize,
1478                                offset * unit + thispos, bitregion_start,
1479                                bitregion_end, part, word_mode, reverse);
1480       bitsdone += thissize;
1481     }
1482 }
1483 \f
1484 /* A subroutine of extract_bit_field_1 that converts return value X
1485    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1486    to extract_bit_field.  */
1487
1488 static rtx
1489 convert_extracted_bit_field (rtx x, machine_mode mode,
1490                              machine_mode tmode, bool unsignedp)
1491 {
1492   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1493     return x;
1494
1495   /* If the x mode is not a scalar integral, first convert to the
1496      integer mode of that size and then access it as a floating-point
1497      value via a SUBREG.  */
1498   if (!SCALAR_INT_MODE_P (tmode))
1499     {
1500       scalar_int_mode int_mode = int_mode_for_mode (tmode).require ();
1501       x = convert_to_mode (int_mode, x, unsignedp);
1502       x = force_reg (int_mode, x);
1503       return gen_lowpart (tmode, x);
1504     }
1505
1506   return convert_to_mode (tmode, x, unsignedp);
1507 }
1508
1509 /* Try to use an ext(z)v pattern to extract a field from OP0.
1510    Return the extracted value on success, otherwise return null.
1511    EXTV describes the extraction instruction to use.  If OP0_MODE
1512    is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM.
1513    The other arguments are as for extract_bit_field.  */
1514
1515 static rtx
1516 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1517                               opt_scalar_int_mode op0_mode,
1518                               unsigned HOST_WIDE_INT bitsize,
1519                               unsigned HOST_WIDE_INT bitnum,
1520                               int unsignedp, rtx target,
1521                               machine_mode mode, machine_mode tmode)
1522 {
1523   class expand_operand ops[4];
1524   rtx spec_target = target;
1525   rtx spec_target_subreg = 0;
1526   scalar_int_mode ext_mode = extv->field_mode;
1527   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1528
1529   if (bitsize == 0 || unit < bitsize)
1530     return NULL_RTX;
1531
1532   if (MEM_P (op0))
1533     /* Get a reference to the first byte of the field.  */
1534     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1535                                 &bitnum);
1536   else
1537     {
1538       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1539       if (BYTES_BIG_ENDIAN)
1540         bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
1541
1542       /* If op0 is a register, we need it in EXT_MODE to make it
1543          acceptable to the format of ext(z)v.  */
1544       if (GET_CODE (op0) == SUBREG && op0_mode.require () != ext_mode)
1545         return NULL_RTX;
1546       if (REG_P (op0) && op0_mode.require () != ext_mode)
1547         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1548     }
1549
1550   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1551      "backwards" from the size of the unit we are extracting from.
1552      Otherwise, we count bits from the most significant on a
1553      BYTES/BITS_BIG_ENDIAN machine.  */
1554
1555   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1556     bitnum = unit - bitsize - bitnum;
1557
1558   if (target == 0)
1559     target = spec_target = gen_reg_rtx (tmode);
1560
1561   if (GET_MODE (target) != ext_mode)
1562     {
1563       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1564          between the mode of the extraction (word_mode) and the target
1565          mode.  Instead, create a temporary and use convert_move to set
1566          the target.  */
1567       if (REG_P (target)
1568           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1569         {
1570           target = gen_lowpart (ext_mode, target);
1571           if (partial_subreg_p (GET_MODE (spec_target), ext_mode))
1572             spec_target_subreg = target;
1573         }
1574       else
1575         target = gen_reg_rtx (ext_mode);
1576     }
1577
1578   create_output_operand (&ops[0], target, ext_mode);
1579   create_fixed_operand (&ops[1], op0);
1580   create_integer_operand (&ops[2], bitsize);
1581   create_integer_operand (&ops[3], bitnum);
1582   if (maybe_expand_insn (extv->icode, 4, ops))
1583     {
1584       target = ops[0].value;
1585       if (target == spec_target)
1586         return target;
1587       if (target == spec_target_subreg)
1588         return spec_target;
1589       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1590     }
1591   return NULL_RTX;
1592 }
1593
1594 /* See whether it would be valid to extract the part of OP0 described
1595    by BITNUM and BITSIZE into a value of mode MODE using a subreg
1596    operation.  Return the subreg if so, otherwise return null.  */
1597
1598 static rtx
1599 extract_bit_field_as_subreg (machine_mode mode, rtx op0,
1600                              poly_uint64 bitsize, poly_uint64 bitnum)
1601 {
1602   poly_uint64 bytenum;
1603   if (multiple_p (bitnum, BITS_PER_UNIT, &bytenum)
1604       && known_eq (bitsize, GET_MODE_BITSIZE (mode))
1605       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1606       && TRULY_NOOP_TRUNCATION_MODES_P (mode, GET_MODE (op0)))
1607     return simplify_gen_subreg (mode, op0, GET_MODE (op0), bytenum);
1608   return NULL_RTX;
1609 }
1610
1611 /* A subroutine of extract_bit_field, with the same arguments.
1612    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1613    if we can find no other means of implementing the operation.
1614    if FALLBACK_P is false, return NULL instead.  */
1615
1616 static rtx
1617 extract_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1618                      int unsignedp, rtx target, machine_mode mode,
1619                      machine_mode tmode, bool reverse, bool fallback_p,
1620                      rtx *alt_rtl)
1621 {
1622   rtx op0 = str_rtx;
1623   machine_mode mode1;
1624
1625   if (tmode == VOIDmode)
1626     tmode = mode;
1627
1628   while (GET_CODE (op0) == SUBREG)
1629     {
1630       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1631       op0 = SUBREG_REG (op0);
1632     }
1633
1634   /* If we have an out-of-bounds access to a register, just return an
1635      uninitialized register of the required mode.  This can occur if the
1636      source code contains an out-of-bounds access to a small array.  */
1637   if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
1638     return gen_reg_rtx (tmode);
1639
1640   if (REG_P (op0)
1641       && mode == GET_MODE (op0)
1642       && known_eq (bitnum, 0U)
1643       && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
1644     {
1645       if (reverse)
1646         op0 = flip_storage_order (mode, op0);
1647       /* We're trying to extract a full register from itself.  */
1648       return op0;
1649     }
1650
1651   /* First try to check for vector from vector extractions.  */
1652   if (VECTOR_MODE_P (GET_MODE (op0))
1653       && !MEM_P (op0)
1654       && VECTOR_MODE_P (tmode)
1655       && known_eq (bitsize, GET_MODE_BITSIZE (tmode))
1656       && maybe_gt (GET_MODE_SIZE (GET_MODE (op0)), GET_MODE_SIZE (tmode)))
1657     {
1658       machine_mode new_mode = GET_MODE (op0);
1659       if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1660         {
1661           scalar_mode inner_mode = GET_MODE_INNER (tmode);
1662           poly_uint64 nunits;
1663           if (!multiple_p (GET_MODE_BITSIZE (GET_MODE (op0)),
1664                            GET_MODE_UNIT_BITSIZE (tmode), &nunits)
1665               || !related_vector_mode (tmode, inner_mode,
1666                                        nunits).exists (&new_mode)
1667               || maybe_ne (GET_MODE_SIZE (new_mode),
1668                            GET_MODE_SIZE (GET_MODE (op0))))
1669             new_mode = VOIDmode;
1670         }
1671       poly_uint64 pos;
1672       if (new_mode != VOIDmode
1673           && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1674               != CODE_FOR_nothing)
1675           && multiple_p (bitnum, GET_MODE_BITSIZE (tmode), &pos))
1676         {
1677           class expand_operand ops[3];
1678           machine_mode outermode = new_mode;
1679           machine_mode innermode = tmode;
1680           enum insn_code icode
1681             = convert_optab_handler (vec_extract_optab, outermode, innermode);
1682
1683           if (new_mode != GET_MODE (op0))
1684             op0 = gen_lowpart (new_mode, op0);
1685           create_output_operand (&ops[0], target, innermode);
1686           ops[0].target = 1;
1687           create_input_operand (&ops[1], op0, outermode);
1688           create_integer_operand (&ops[2], pos);
1689           if (maybe_expand_insn (icode, 3, ops))
1690             {
1691               if (alt_rtl && ops[0].target)
1692                 *alt_rtl = target;
1693               target = ops[0].value;
1694               if (GET_MODE (target) != mode)
1695                 return gen_lowpart (tmode, target);
1696               return target;
1697             }
1698         }
1699     }
1700
1701   /* See if we can get a better vector mode before extracting.  */
1702   if (VECTOR_MODE_P (GET_MODE (op0))
1703       && !MEM_P (op0)
1704       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1705     {
1706       machine_mode new_mode;
1707
1708       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1709         new_mode = MIN_MODE_VECTOR_FLOAT;
1710       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1711         new_mode = MIN_MODE_VECTOR_FRACT;
1712       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1713         new_mode = MIN_MODE_VECTOR_UFRACT;
1714       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1715         new_mode = MIN_MODE_VECTOR_ACCUM;
1716       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1717         new_mode = MIN_MODE_VECTOR_UACCUM;
1718       else
1719         new_mode = MIN_MODE_VECTOR_INT;
1720
1721       FOR_EACH_MODE_FROM (new_mode, new_mode)
1722         if (known_eq (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (GET_MODE (op0)))
1723             && known_eq (GET_MODE_UNIT_SIZE (new_mode), GET_MODE_SIZE (tmode))
1724             && targetm.vector_mode_supported_p (new_mode))
1725           break;
1726       if (new_mode != VOIDmode)
1727         op0 = gen_lowpart (new_mode, op0);
1728     }
1729
1730   /* Use vec_extract patterns for extracting parts of vectors whenever
1731      available.  If that fails, see whether the current modes and bitregion
1732      give a natural subreg.  */
1733   machine_mode outermode = GET_MODE (op0);
1734   if (VECTOR_MODE_P (outermode) && !MEM_P (op0))
1735     {
1736       scalar_mode innermode = GET_MODE_INNER (outermode);
1737       enum insn_code icode
1738         = convert_optab_handler (vec_extract_optab, outermode, innermode);
1739       poly_uint64 pos;
1740       if (icode != CODE_FOR_nothing
1741           && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
1742           && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
1743         {
1744           class expand_operand ops[3];
1745
1746           create_output_operand (&ops[0], target, innermode);
1747           ops[0].target = 1;
1748           create_input_operand (&ops[1], op0, outermode);
1749           create_integer_operand (&ops[2], pos);
1750           if (maybe_expand_insn (icode, 3, ops))
1751             {
1752               if (alt_rtl && ops[0].target)
1753                 *alt_rtl = target;
1754               target = ops[0].value;
1755               if (GET_MODE (target) != mode)
1756                 return gen_lowpart (tmode, target);
1757               return target;
1758             }
1759         }
1760       /* Using subregs is useful if we're extracting one register vector
1761          from a multi-register vector.  extract_bit_field_as_subreg checks
1762          for valid bitsize and bitnum, so we don't need to do that here.  */
1763       if (VECTOR_MODE_P (mode))
1764         {
1765           rtx sub = extract_bit_field_as_subreg (mode, op0, bitsize, bitnum);
1766           if (sub)
1767             return sub;
1768         }
1769     }
1770
1771   /* Make sure we are playing with integral modes.  Pun with subregs
1772      if we aren't.  */
1773   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
1774   scalar_int_mode imode;
1775   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
1776     {
1777       if (MEM_P (op0))
1778         op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
1779                                             0, MEM_SIZE (op0));
1780       else if (op0_mode.exists (&imode))
1781         {
1782           op0 = gen_lowpart (imode, op0);
1783
1784           /* If we got a SUBREG, force it into a register since we
1785              aren't going to be able to do another SUBREG on it.  */
1786           if (GET_CODE (op0) == SUBREG)
1787             op0 = force_reg (imode, op0);
1788         }
1789       else
1790         {
1791           poly_int64 size = GET_MODE_SIZE (GET_MODE (op0));
1792           rtx mem = assign_stack_temp (GET_MODE (op0), size);
1793           emit_move_insn (mem, op0);
1794           op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1795         }
1796     }
1797
1798   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1799      If that's wrong, the solution is to test for it and set TARGET to 0
1800      if needed.  */
1801
1802   /* Get the mode of the field to use for atomic access or subreg
1803      conversion.  */
1804   if (!SCALAR_INT_MODE_P (tmode)
1805       || !mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0).exists (&mode1))
1806     mode1 = mode;
1807   gcc_assert (mode1 != BLKmode);
1808
1809   /* Extraction of a full MODE1 value can be done with a subreg as long
1810      as the least significant bit of the value is the least significant
1811      bit of either OP0 or a word of OP0.  */
1812   if (!MEM_P (op0) && !reverse)
1813     {
1814       rtx sub = extract_bit_field_as_subreg (mode1, op0, bitsize, bitnum);
1815       if (sub)
1816         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1817     }
1818
1819   /* Extraction of a full MODE1 value can be done with a load as long as
1820      the field is on a byte boundary and is sufficiently aligned.  */
1821   poly_uint64 bytenum;
1822   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1, &bytenum))
1823     {
1824       op0 = adjust_bitfield_address (op0, mode1, bytenum);
1825       if (reverse)
1826         op0 = flip_storage_order (mode1, op0);
1827       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1828     }
1829
1830   /* If we have a memory source and a non-constant bit offset, restrict
1831      the memory to the referenced bytes.  This is a worst-case fallback
1832      but is useful for things like vector booleans.  */
1833   if (MEM_P (op0) && !bitnum.is_constant ())
1834     {
1835       bytenum = bits_to_bytes_round_down (bitnum);
1836       bitnum = num_trailing_bits (bitnum);
1837       poly_uint64 bytesize = bits_to_bytes_round_up (bitnum + bitsize);
1838       op0 = adjust_bitfield_address_size (op0, BLKmode, bytenum, bytesize);
1839       op0_mode = opt_scalar_int_mode ();
1840     }
1841
1842   /* It's possible we'll need to handle other cases here for
1843      polynomial bitnum and bitsize.  */
1844
1845   /* From here on we need to be looking at a fixed-size insertion.  */
1846   return extract_integral_bit_field (op0, op0_mode, bitsize.to_constant (),
1847                                      bitnum.to_constant (), unsignedp,
1848                                      target, mode, tmode, reverse, fallback_p);
1849 }
1850
1851 /* Subroutine of extract_bit_field_1, with the same arguments, except
1852    that BITSIZE and BITNUM are constant.  Handle cases specific to
1853    integral modes.  If OP0_MODE is defined, it is the mode of OP0,
1854    otherwise OP0 is a BLKmode MEM.  */
1855
1856 static rtx
1857 extract_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1858                             unsigned HOST_WIDE_INT bitsize,
1859                             unsigned HOST_WIDE_INT bitnum, int unsignedp,
1860                             rtx target, machine_mode mode, machine_mode tmode,
1861                             bool reverse, bool fallback_p)
1862 {
1863   /* Handle fields bigger than a word.  */
1864
1865   if (bitsize > BITS_PER_WORD)
1866     {
1867       /* Here we transfer the words of the field
1868          in the order least significant first.
1869          This is because the most significant word is the one which may
1870          be less than full.  */
1871
1872       const bool backwards = WORDS_BIG_ENDIAN;
1873       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1874       unsigned int i;
1875       rtx_insn *last;
1876
1877       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1878         target = gen_reg_rtx (mode);
1879
1880       /* In case we're about to clobber a base register or something
1881          (see gcc.c-torture/execute/20040625-1.c).   */
1882       if (reg_mentioned_p (target, op0))
1883         target = gen_reg_rtx (mode);
1884
1885       /* Indicate for flow that the entire target reg is being set.  */
1886       emit_clobber (target);
1887
1888       /* The mode must be fixed-size, since extract_bit_field_1 handles
1889          extractions from variable-sized objects before calling this
1890          function.  */
1891       unsigned int target_size
1892         = GET_MODE_SIZE (GET_MODE (target)).to_constant ();
1893       last = get_last_insn ();
1894       for (i = 0; i < nwords; i++)
1895         {
1896           /* If I is 0, use the low-order word in both field and target;
1897              if I is 1, use the next to lowest word; and so on.  */
1898           /* Word number in TARGET to use.  */
1899           unsigned int wordnum
1900             = (backwards ? target_size / UNITS_PER_WORD - i - 1 : i);
1901           /* Offset from start of field in OP0.  */
1902           unsigned int bit_offset = (backwards ^ reverse
1903                                      ? MAX ((int) bitsize - ((int) i + 1)
1904                                             * BITS_PER_WORD,
1905                                             0)
1906                                      : (int) i * BITS_PER_WORD);
1907           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1908           rtx result_part
1909             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1910                                              bitsize - i * BITS_PER_WORD),
1911                                    bitnum + bit_offset, 1, target_part,
1912                                    mode, word_mode, reverse, fallback_p, NULL);
1913
1914           gcc_assert (target_part);
1915           if (!result_part)
1916             {
1917               delete_insns_since (last);
1918               return NULL;
1919             }
1920
1921           if (result_part != target_part)
1922             emit_move_insn (target_part, result_part);
1923         }
1924
1925       if (unsignedp)
1926         {
1927           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1928              need to be zero'd out.  */
1929           if (target_size > nwords * UNITS_PER_WORD)
1930             {
1931               unsigned int i, total_words;
1932
1933               total_words = target_size / UNITS_PER_WORD;
1934               for (i = nwords; i < total_words; i++)
1935                 emit_move_insn
1936                   (operand_subword (target,
1937                                     backwards ? total_words - i - 1 : i,
1938                                     1, VOIDmode),
1939                    const0_rtx);
1940             }
1941           return target;
1942         }
1943
1944       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1945       target = expand_shift (LSHIFT_EXPR, mode, target,
1946                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1947       return expand_shift (RSHIFT_EXPR, mode, target,
1948                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1949     }
1950
1951   /* If OP0 is a multi-word register, narrow it to the affected word.
1952      If the region spans two words, defer to extract_split_bit_field.  */
1953   if (!MEM_P (op0) && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD)
1954     {
1955       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1956         {
1957           if (!fallback_p)
1958             return NULL_RTX;
1959           target = extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
1960                                             unsignedp, reverse);
1961           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1962         }
1963       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1964                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1965       op0_mode = word_mode;
1966       bitnum %= BITS_PER_WORD;
1967     }
1968
1969   /* From here on we know the desired field is smaller than a word.
1970      If OP0 is a register, it too fits within a word.  */
1971   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1972   extraction_insn extv;
1973   if (!MEM_P (op0)
1974       && !reverse
1975       /* ??? We could limit the structure size to the part of OP0 that
1976          contains the field, with appropriate checks for endianness
1977          and TARGET_TRULY_NOOP_TRUNCATION.  */
1978       && get_best_reg_extraction_insn (&extv, pattern,
1979                                        GET_MODE_BITSIZE (op0_mode.require ()),
1980                                        tmode))
1981     {
1982       rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
1983                                                  bitsize, bitnum,
1984                                                  unsignedp, target, mode,
1985                                                  tmode);
1986       if (result)
1987         return result;
1988     }
1989
1990   /* If OP0 is a memory, try copying it to a register and seeing if a
1991      cheap register alternative is available.  */
1992   if (MEM_P (op0) & !reverse)
1993     {
1994       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1995                                         tmode))
1996         {
1997           rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
1998                                                      bitsize, bitnum,
1999                                                      unsignedp, target, mode,
2000                                                      tmode);
2001           if (result)
2002             return result;
2003         }
2004
2005       rtx_insn *last = get_last_insn ();
2006
2007       /* Try loading part of OP0 into a register and extracting the
2008          bitfield from that.  */
2009       unsigned HOST_WIDE_INT bitpos;
2010       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
2011                                                0, 0, tmode, &bitpos);
2012       if (xop0)
2013         {
2014           xop0 = copy_to_reg (xop0);
2015           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
2016                                             unsignedp, target,
2017                                             mode, tmode, reverse, false, NULL);
2018           if (result)
2019             return result;
2020           delete_insns_since (last);
2021         }
2022     }
2023
2024   if (!fallback_p)
2025     return NULL;
2026
2027   /* Find a correspondingly-sized integer field, so we can apply
2028      shifts and masks to it.  */
2029   scalar_int_mode int_mode;
2030   if (!int_mode_for_mode (tmode).exists (&int_mode))
2031     /* If this fails, we should probably push op0 out to memory and then
2032        do a load.  */
2033     int_mode = int_mode_for_mode (mode).require ();
2034
2035   target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize,
2036                                     bitnum, target, unsignedp, reverse);
2037
2038   /* Complex values must be reversed piecewise, so we need to undo the global
2039      reversal, convert to the complex mode and reverse again.  */
2040   if (reverse && COMPLEX_MODE_P (tmode))
2041     {
2042       target = flip_storage_order (int_mode, target);
2043       target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2044       target = flip_storage_order (tmode, target);
2045     }
2046   else
2047     target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2048
2049   return target;
2050 }
2051
2052 /* Generate code to extract a byte-field from STR_RTX
2053    containing BITSIZE bits, starting at BITNUM,
2054    and put it in TARGET if possible (if TARGET is nonzero).
2055    Regardless of TARGET, we return the rtx for where the value is placed.
2056
2057    STR_RTX is the structure containing the byte (a REG or MEM).
2058    UNSIGNEDP is nonzero if this is an unsigned bit field.
2059    MODE is the natural mode of the field value once extracted.
2060    TMODE is the mode the caller would like the value to have;
2061    but the value may be returned with type MODE instead.
2062
2063    If REVERSE is true, the extraction is to be done in reverse order.
2064
2065    If a TARGET is specified and we can store in it at no extra cost,
2066    we do so, and return TARGET.
2067    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
2068    if they are equally easy.
2069
2070    If the result can be stored at TARGET, and ALT_RTL is non-NULL,
2071    then *ALT_RTL is set to TARGET (before legitimziation).  */
2072
2073 rtx
2074 extract_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
2075                    int unsignedp, rtx target, machine_mode mode,
2076                    machine_mode tmode, bool reverse, rtx *alt_rtl)
2077 {
2078   machine_mode mode1;
2079
2080   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
2081   if (maybe_ne (GET_MODE_BITSIZE (GET_MODE (str_rtx)), 0))
2082     mode1 = GET_MODE (str_rtx);
2083   else if (target && maybe_ne (GET_MODE_BITSIZE (GET_MODE (target)), 0))
2084     mode1 = GET_MODE (target);
2085   else
2086     mode1 = tmode;
2087
2088   unsigned HOST_WIDE_INT ibitsize, ibitnum;
2089   scalar_int_mode int_mode;
2090   if (bitsize.is_constant (&ibitsize)
2091       && bitnum.is_constant (&ibitnum)
2092       && is_a <scalar_int_mode> (mode1, &int_mode)
2093       && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum,
2094                                      int_mode, 0, 0))
2095     {
2096       /* Extraction of a full INT_MODE value can be done with a simple load.
2097          We know here that the field can be accessed with one single
2098          instruction.  For targets that support unaligned memory,
2099          an unaligned access may be necessary.  */
2100       if (ibitsize == GET_MODE_BITSIZE (int_mode))
2101         {
2102           rtx result = adjust_bitfield_address (str_rtx, int_mode,
2103                                                 ibitnum / BITS_PER_UNIT);
2104           if (reverse)
2105             result = flip_storage_order (int_mode, result);
2106           gcc_assert (ibitnum % BITS_PER_UNIT == 0);
2107           return convert_extracted_bit_field (result, mode, tmode, unsignedp);
2108         }
2109
2110       str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize, ibitnum,
2111                                       &ibitnum);
2112       gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
2113       str_rtx = copy_to_reg (str_rtx);
2114       return extract_bit_field_1 (str_rtx, ibitsize, ibitnum, unsignedp,
2115                                   target, mode, tmode, reverse, true, alt_rtl);
2116     }
2117
2118   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
2119                               target, mode, tmode, reverse, true, alt_rtl);
2120 }
2121 \f
2122 /* Use shifts and boolean operations to extract a field of BITSIZE bits
2123    from bit BITNUM of OP0.  If OP0_MODE is defined, it is the mode of OP0,
2124    otherwise OP0 is a BLKmode MEM.
2125
2126    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
2127    If REVERSE is true, the extraction is to be done in reverse order.
2128
2129    If TARGET is nonzero, attempts to store the value there
2130    and return TARGET, but this is not guaranteed.
2131    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
2132
2133 static rtx
2134 extract_fixed_bit_field (machine_mode tmode, rtx op0,
2135                          opt_scalar_int_mode op0_mode,
2136                          unsigned HOST_WIDE_INT bitsize,
2137                          unsigned HOST_WIDE_INT bitnum, rtx target,
2138                          int unsignedp, bool reverse)
2139 {
2140   scalar_int_mode mode;
2141   if (MEM_P (op0))
2142     {
2143       if (!get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
2144                           BITS_PER_WORD, MEM_VOLATILE_P (op0), &mode))
2145         /* The only way this should occur is if the field spans word
2146            boundaries.  */
2147         return extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
2148                                         unsignedp, reverse);
2149
2150       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
2151     }
2152   else
2153     mode = op0_mode.require ();
2154
2155   return extract_fixed_bit_field_1 (tmode, op0, mode, bitsize, bitnum,
2156                                     target, unsignedp, reverse);
2157 }
2158
2159 /* Helper function for extract_fixed_bit_field, extracts
2160    the bit field always using MODE, which is the mode of OP0.
2161    The other arguments are as for extract_fixed_bit_field.  */
2162
2163 static rtx
2164 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, scalar_int_mode mode,
2165                            unsigned HOST_WIDE_INT bitsize,
2166                            unsigned HOST_WIDE_INT bitnum, rtx target,
2167                            int unsignedp, bool reverse)
2168 {
2169   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2170      for invalid input, such as extract equivalent of f5 from
2171      gcc.dg/pr48335-2.c.  */
2172
2173   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2174     /* BITNUM is the distance between our msb and that of OP0.
2175        Convert it to the distance from the lsb.  */
2176     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2177
2178   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2179      We have reduced the big-endian case to the little-endian case.  */
2180   if (reverse)
2181     op0 = flip_storage_order (mode, op0);
2182
2183   if (unsignedp)
2184     {
2185       if (bitnum)
2186         {
2187           /* If the field does not already start at the lsb,
2188              shift it so it does.  */
2189           /* Maybe propagate the target for the shift.  */
2190           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2191           if (tmode != mode)
2192             subtarget = 0;
2193           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2194         }
2195       /* Convert the value to the desired mode.  TMODE must also be a
2196          scalar integer for this conversion to make sense, since we
2197          shouldn't reinterpret the bits.  */
2198       scalar_int_mode new_mode = as_a <scalar_int_mode> (tmode);
2199       if (mode != new_mode)
2200         op0 = convert_to_mode (new_mode, op0, 1);
2201
2202       /* Unless the msb of the field used to be the msb when we shifted,
2203          mask out the upper bits.  */
2204
2205       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2206         return expand_binop (new_mode, and_optab, op0,
2207                              mask_rtx (new_mode, 0, bitsize, 0),
2208                              target, 1, OPTAB_LIB_WIDEN);
2209       return op0;
2210     }
2211
2212   /* To extract a signed bit-field, first shift its msb to the msb of the word,
2213      then arithmetic-shift its lsb to the lsb of the word.  */
2214   op0 = force_reg (mode, op0);
2215
2216   /* Find the narrowest integer mode that contains the field.  */
2217
2218   opt_scalar_int_mode mode_iter;
2219   FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
2220     if (GET_MODE_BITSIZE (mode_iter.require ()) >= bitsize + bitnum)
2221       break;
2222
2223   mode = mode_iter.require ();
2224   op0 = convert_to_mode (mode, op0, 0);
2225
2226   if (mode != tmode)
2227     target = 0;
2228
2229   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2230     {
2231       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2232       /* Maybe propagate the target for the shift.  */
2233       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2234       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2235     }
2236
2237   return expand_shift (RSHIFT_EXPR, mode, op0,
2238                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2239 }
2240
2241 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2242    VALUE << BITPOS.  */
2243
2244 static rtx
2245 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2246               int bitpos)
2247 {
2248   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2249 }
2250 \f
2251 /* Extract a bit field that is split across two words
2252    and return an RTX for the result.
2253
2254    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2255    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2256    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2257    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
2258    a BLKmode MEM.
2259
2260    If REVERSE is true, the extraction is to be done in reverse order.  */
2261
2262 static rtx
2263 extract_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
2264                          unsigned HOST_WIDE_INT bitsize,
2265                          unsigned HOST_WIDE_INT bitpos, int unsignedp,
2266                          bool reverse)
2267 {
2268   unsigned int unit;
2269   unsigned int bitsdone = 0;
2270   rtx result = NULL_RTX;
2271   int first = 1;
2272
2273   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2274      much at a time.  */
2275   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2276     unit = BITS_PER_WORD;
2277   else
2278     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2279
2280   while (bitsdone < bitsize)
2281     {
2282       unsigned HOST_WIDE_INT thissize;
2283       rtx part;
2284       unsigned HOST_WIDE_INT thispos;
2285       unsigned HOST_WIDE_INT offset;
2286
2287       offset = (bitpos + bitsdone) / unit;
2288       thispos = (bitpos + bitsdone) % unit;
2289
2290       /* THISSIZE must not overrun a word boundary.  Otherwise,
2291          extract_fixed_bit_field will call us again, and we will mutually
2292          recurse forever.  */
2293       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2294       thissize = MIN (thissize, unit - thispos);
2295
2296       /* If OP0 is a register, then handle OFFSET here.  */
2297       rtx op0_piece = op0;
2298       opt_scalar_int_mode op0_piece_mode = op0_mode;
2299       if (SUBREG_P (op0) || REG_P (op0))
2300         {
2301           op0_piece = operand_subword_force (op0, offset, op0_mode.require ());
2302           op0_piece_mode = word_mode;
2303           offset = 0;
2304         }
2305
2306       /* Extract the parts in bit-counting order,
2307          whose meaning is determined by BYTES_PER_UNIT.
2308          OFFSET is in UNITs, and UNIT is in bits.  */
2309       part = extract_fixed_bit_field (word_mode, op0_piece, op0_piece_mode,
2310                                       thissize, offset * unit + thispos,
2311                                       0, 1, reverse);
2312       bitsdone += thissize;
2313
2314       /* Shift this part into place for the result.  */
2315       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2316         {
2317           if (bitsize != bitsdone)
2318             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2319                                  bitsize - bitsdone, 0, 1);
2320         }
2321       else
2322         {
2323           if (bitsdone != thissize)
2324             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2325                                  bitsdone - thissize, 0, 1);
2326         }
2327
2328       if (first)
2329         result = part;
2330       else
2331         /* Combine the parts with bitwise or.  This works
2332            because we extracted each part as an unsigned bit field.  */
2333         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2334                                OPTAB_LIB_WIDEN);
2335
2336       first = 0;
2337     }
2338
2339   /* Unsigned bit field: we are done.  */
2340   if (unsignedp)
2341     return result;
2342   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2343   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2344                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2345   return expand_shift (RSHIFT_EXPR, word_mode, result,
2346                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2347 }
2348 \f
2349 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2350    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2351    MODE, fill the upper bits with zeros.  Fail if the layout of either
2352    mode is unknown (as for CC modes) or if the extraction would involve
2353    unprofitable mode punning.  Return the value on success, otherwise
2354    return null.
2355
2356    This is different from gen_lowpart* in these respects:
2357
2358      - the returned value must always be considered an rvalue
2359
2360      - when MODE is wider than SRC_MODE, the extraction involves
2361        a zero extension
2362
2363      - when MODE is smaller than SRC_MODE, the extraction involves
2364        a truncation (and is thus subject to TARGET_TRULY_NOOP_TRUNCATION).
2365
2366    In other words, this routine performs a computation, whereas the
2367    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2368    operations.  */
2369
2370 rtx
2371 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2372 {
2373   scalar_int_mode int_mode, src_int_mode;
2374
2375   if (mode == src_mode)
2376     return src;
2377
2378   if (CONSTANT_P (src))
2379     {
2380       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2381          fails, it will happily create (subreg (symbol_ref)) or similar
2382          invalid SUBREGs.  */
2383       poly_uint64 byte = subreg_lowpart_offset (mode, src_mode);
2384       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2385       if (ret)
2386         return ret;
2387
2388       if (GET_MODE (src) == VOIDmode
2389           || !validate_subreg (mode, src_mode, src, byte))
2390         return NULL_RTX;
2391
2392       src = force_reg (GET_MODE (src), src);
2393       return gen_rtx_SUBREG (mode, src, byte);
2394     }
2395
2396   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2397     return NULL_RTX;
2398
2399   if (known_eq (GET_MODE_BITSIZE (mode), GET_MODE_BITSIZE (src_mode))
2400       && targetm.modes_tieable_p (mode, src_mode))
2401     {
2402       rtx x = gen_lowpart_common (mode, src);
2403       if (x)
2404         return x;
2405     }
2406
2407   if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
2408       || !int_mode_for_mode (mode).exists (&int_mode))
2409     return NULL_RTX;
2410
2411   if (!targetm.modes_tieable_p (src_int_mode, src_mode))
2412     return NULL_RTX;
2413   if (!targetm.modes_tieable_p (int_mode, mode))
2414     return NULL_RTX;
2415
2416   src = gen_lowpart (src_int_mode, src);
2417   if (!validate_subreg (int_mode, src_int_mode, src,
2418                         subreg_lowpart_offset (int_mode, src_int_mode)))
2419     return NULL_RTX;
2420
2421   src = convert_modes (int_mode, src_int_mode, src, true);
2422   src = gen_lowpart (mode, src);
2423   return src;
2424 }
2425 \f
2426 /* Add INC into TARGET.  */
2427
2428 void
2429 expand_inc (rtx target, rtx inc)
2430 {
2431   rtx value = expand_binop (GET_MODE (target), add_optab,
2432                             target, inc,
2433                             target, 0, OPTAB_LIB_WIDEN);
2434   if (value != target)
2435     emit_move_insn (target, value);
2436 }
2437
2438 /* Subtract DEC from TARGET.  */
2439
2440 void
2441 expand_dec (rtx target, rtx dec)
2442 {
2443   rtx value = expand_binop (GET_MODE (target), sub_optab,
2444                             target, dec,
2445                             target, 0, OPTAB_LIB_WIDEN);
2446   if (value != target)
2447     emit_move_insn (target, value);
2448 }
2449 \f
2450 /* Output a shift instruction for expression code CODE,
2451    with SHIFTED being the rtx for the value to shift,
2452    and AMOUNT the rtx for the amount to shift by.
2453    Store the result in the rtx TARGET, if that is convenient.
2454    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2455    Return the rtx for where the value is.
2456    If that cannot be done, abort the compilation unless MAY_FAIL is true,
2457    in which case 0 is returned.  */
2458
2459 static rtx
2460 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2461                 rtx amount, rtx target, int unsignedp, bool may_fail = false)
2462 {
2463   rtx op1, temp = 0;
2464   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2465   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2466   optab lshift_optab = ashl_optab;
2467   optab rshift_arith_optab = ashr_optab;
2468   optab rshift_uns_optab = lshr_optab;
2469   optab lrotate_optab = rotl_optab;
2470   optab rrotate_optab = rotr_optab;
2471   machine_mode op1_mode;
2472   scalar_mode scalar_mode = GET_MODE_INNER (mode);
2473   int attempt;
2474   bool speed = optimize_insn_for_speed_p ();
2475
2476   op1 = amount;
2477   op1_mode = GET_MODE (op1);
2478
2479   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2480      shift amount is a vector, use the vector/vector shift patterns.  */
2481   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2482     {
2483       lshift_optab = vashl_optab;
2484       rshift_arith_optab = vashr_optab;
2485       rshift_uns_optab = vlshr_optab;
2486       lrotate_optab = vrotl_optab;
2487       rrotate_optab = vrotr_optab;
2488     }
2489
2490   /* Previously detected shift-counts computed by NEGATE_EXPR
2491      and shifted in the other direction; but that does not work
2492      on all machines.  */
2493
2494   if (SHIFT_COUNT_TRUNCATED)
2495     {
2496       if (CONST_INT_P (op1)
2497           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2498               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2499         op1 = gen_int_shift_amount (mode,
2500                                     (unsigned HOST_WIDE_INT) INTVAL (op1)
2501                                     % GET_MODE_BITSIZE (scalar_mode));
2502       else if (GET_CODE (op1) == SUBREG
2503                && subreg_lowpart_p (op1)
2504                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2505                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2506         op1 = SUBREG_REG (op1);
2507     }
2508
2509   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2510      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2511      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2512      amount instead.  */
2513   if (rotate
2514       && CONST_INT_P (op1)
2515       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2516                    GET_MODE_BITSIZE (scalar_mode) - 1))
2517     {
2518       op1 = gen_int_shift_amount (mode, (GET_MODE_BITSIZE (scalar_mode)
2519                                          - INTVAL (op1)));
2520       left = !left;
2521       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2522     }
2523
2524   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2525      Note that this is not the case for bigger values.  For instance a rotation
2526      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2527      0x04030201 (bswapsi).  */
2528   if (rotate
2529       && CONST_INT_P (op1)
2530       && INTVAL (op1) == BITS_PER_UNIT
2531       && GET_MODE_SIZE (scalar_mode) == 2
2532       && optab_handler (bswap_optab, mode) != CODE_FOR_nothing)
2533     return expand_unop (mode, bswap_optab, shifted, NULL_RTX, unsignedp);
2534
2535   if (op1 == const0_rtx)
2536     return shifted;
2537
2538   /* Check whether its cheaper to implement a left shift by a constant
2539      bit count by a sequence of additions.  */
2540   if (code == LSHIFT_EXPR
2541       && CONST_INT_P (op1)
2542       && INTVAL (op1) > 0
2543       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2544       && INTVAL (op1) < MAX_BITS_PER_WORD
2545       && (shift_cost (speed, mode, INTVAL (op1))
2546           > INTVAL (op1) * add_cost (speed, mode))
2547       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2548     {
2549       int i;
2550       for (i = 0; i < INTVAL (op1); i++)
2551         {
2552           temp = force_reg (mode, shifted);
2553           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2554                                   unsignedp, OPTAB_LIB_WIDEN);
2555         }
2556       return shifted;
2557     }
2558
2559   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2560     {
2561       enum optab_methods methods;
2562
2563       if (attempt == 0)
2564         methods = OPTAB_DIRECT;
2565       else if (attempt == 1)
2566         methods = OPTAB_WIDEN;
2567       else
2568         methods = OPTAB_LIB_WIDEN;
2569
2570       if (rotate)
2571         {
2572           /* Widening does not work for rotation.  */
2573           if (methods == OPTAB_WIDEN)
2574             continue;
2575           else if (methods == OPTAB_LIB_WIDEN)
2576             {
2577               /* If we have been unable to open-code this by a rotation,
2578                  do it as the IOR of two shifts.  I.e., to rotate A
2579                  by N bits, compute
2580                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2581                  where C is the bitsize of A.
2582
2583                  It is theoretically possible that the target machine might
2584                  not be able to perform either shift and hence we would
2585                  be making two libcalls rather than just the one for the
2586                  shift (similarly if IOR could not be done).  We will allow
2587                  this extremely unlikely lossage to avoid complicating the
2588                  code below.  */
2589
2590               rtx subtarget = target == shifted ? 0 : target;
2591               rtx new_amount, other_amount;
2592               rtx temp1;
2593
2594               new_amount = op1;
2595               if (op1 == const0_rtx)
2596                 return shifted;
2597               else if (CONST_INT_P (op1))
2598                 other_amount = gen_int_shift_amount
2599                   (mode, GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2600               else
2601                 {
2602                   other_amount
2603                     = simplify_gen_unary (NEG, GET_MODE (op1),
2604                                           op1, GET_MODE (op1));
2605                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2606                   other_amount
2607                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2608                                            gen_int_mode (mask, GET_MODE (op1)));
2609                 }
2610
2611               shifted = force_reg (mode, shifted);
2612
2613               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2614                                      mode, shifted, new_amount, 0, 1);
2615               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2616                                       mode, shifted, other_amount,
2617                                       subtarget, 1);
2618               return expand_binop (mode, ior_optab, temp, temp1, target,
2619                                    unsignedp, methods);
2620             }
2621
2622           temp = expand_binop (mode,
2623                                left ? lrotate_optab : rrotate_optab,
2624                                shifted, op1, target, unsignedp, methods);
2625         }
2626       else if (unsignedp)
2627         temp = expand_binop (mode,
2628                              left ? lshift_optab : rshift_uns_optab,
2629                              shifted, op1, target, unsignedp, methods);
2630
2631       /* Do arithmetic shifts.
2632          Also, if we are going to widen the operand, we can just as well
2633          use an arithmetic right-shift instead of a logical one.  */
2634       if (temp == 0 && ! rotate
2635           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2636         {
2637           enum optab_methods methods1 = methods;
2638
2639           /* If trying to widen a log shift to an arithmetic shift,
2640              don't accept an arithmetic shift of the same size.  */
2641           if (unsignedp)
2642             methods1 = OPTAB_MUST_WIDEN;
2643
2644           /* Arithmetic shift */
2645
2646           temp = expand_binop (mode,
2647                                left ? lshift_optab : rshift_arith_optab,
2648                                shifted, op1, target, unsignedp, methods1);
2649         }
2650
2651       /* We used to try extzv here for logical right shifts, but that was
2652          only useful for one machine, the VAX, and caused poor code
2653          generation there for lshrdi3, so the code was deleted and a
2654          define_expand for lshrsi3 was added to vax.md.  */
2655     }
2656
2657   gcc_assert (temp != NULL_RTX || may_fail);
2658   return temp;
2659 }
2660
2661 /* Output a shift instruction for expression code CODE,
2662    with SHIFTED being the rtx for the value to shift,
2663    and AMOUNT the amount to shift by.
2664    Store the result in the rtx TARGET, if that is convenient.
2665    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2666    Return the rtx for where the value is.  */
2667
2668 rtx
2669 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2670               poly_int64 amount, rtx target, int unsignedp)
2671 {
2672   return expand_shift_1 (code, mode, shifted,
2673                          gen_int_shift_amount (mode, amount),
2674                          target, unsignedp);
2675 }
2676
2677 /* Likewise, but return 0 if that cannot be done.  */
2678
2679 static rtx
2680 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2681                     int amount, rtx target, int unsignedp)
2682 {
2683   return expand_shift_1 (code, mode,
2684                          shifted, GEN_INT (amount), target, unsignedp, true);
2685 }
2686
2687 /* Output a shift instruction for expression code CODE,
2688    with SHIFTED being the rtx for the value to shift,
2689    and AMOUNT the tree for the amount to shift by.
2690    Store the result in the rtx TARGET, if that is convenient.
2691    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2692    Return the rtx for where the value is.  */
2693
2694 rtx
2695 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2696                        tree amount, rtx target, int unsignedp)
2697 {
2698   return expand_shift_1 (code, mode,
2699                          shifted, expand_normal (amount), target, unsignedp);
2700 }
2701
2702 \f
2703 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2704                         const struct mult_cost *, machine_mode mode);
2705 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2706                               const struct algorithm *, enum mult_variant);
2707 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2708 static rtx extract_high_half (scalar_int_mode, rtx);
2709 static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int);
2710 static rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx,
2711                                        int, int);
2712 /* Compute and return the best algorithm for multiplying by T.
2713    The algorithm must cost less than cost_limit
2714    If retval.cost >= COST_LIMIT, no algorithm was found and all
2715    other field of the returned struct are undefined.
2716    MODE is the machine mode of the multiplication.  */
2717
2718 static void
2719 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2720             const struct mult_cost *cost_limit, machine_mode mode)
2721 {
2722   int m;
2723   struct algorithm *alg_in, *best_alg;
2724   struct mult_cost best_cost;
2725   struct mult_cost new_limit;
2726   int op_cost, op_latency;
2727   unsigned HOST_WIDE_INT orig_t = t;
2728   unsigned HOST_WIDE_INT q;
2729   int maxm, hash_index;
2730   bool cache_hit = false;
2731   enum alg_code cache_alg = alg_zero;
2732   bool speed = optimize_insn_for_speed_p ();
2733   scalar_int_mode imode;
2734   struct alg_hash_entry *entry_ptr;
2735
2736   /* Indicate that no algorithm is yet found.  If no algorithm
2737      is found, this value will be returned and indicate failure.  */
2738   alg_out->cost.cost = cost_limit->cost + 1;
2739   alg_out->cost.latency = cost_limit->latency + 1;
2740
2741   if (cost_limit->cost < 0
2742       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2743     return;
2744
2745   /* Be prepared for vector modes.  */
2746   imode = as_a <scalar_int_mode> (GET_MODE_INNER (mode));
2747
2748   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2749
2750   /* Restrict the bits of "t" to the multiplication's mode.  */
2751   t &= GET_MODE_MASK (imode);
2752
2753   /* t == 1 can be done in zero cost.  */
2754   if (t == 1)
2755     {
2756       alg_out->ops = 1;
2757       alg_out->cost.cost = 0;
2758       alg_out->cost.latency = 0;
2759       alg_out->op[0] = alg_m;
2760       return;
2761     }
2762
2763   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2764      fail now.  */
2765   if (t == 0)
2766     {
2767       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2768         return;
2769       else
2770         {
2771           alg_out->ops = 1;
2772           alg_out->cost.cost = zero_cost (speed);
2773           alg_out->cost.latency = zero_cost (speed);
2774           alg_out->op[0] = alg_zero;
2775           return;
2776         }
2777     }
2778
2779   /* We'll be needing a couple extra algorithm structures now.  */
2780
2781   alg_in = XALLOCA (struct algorithm);
2782   best_alg = XALLOCA (struct algorithm);
2783   best_cost = *cost_limit;
2784
2785   /* Compute the hash index.  */
2786   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2787
2788   /* See if we already know what to do for T.  */
2789   entry_ptr = alg_hash_entry_ptr (hash_index);
2790   if (entry_ptr->t == t
2791       && entry_ptr->mode == mode
2792       && entry_ptr->speed == speed
2793       && entry_ptr->alg != alg_unknown)
2794     {
2795       cache_alg = entry_ptr->alg;
2796
2797       if (cache_alg == alg_impossible)
2798         {
2799           /* The cache tells us that it's impossible to synthesize
2800              multiplication by T within entry_ptr->cost.  */
2801           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2802             /* COST_LIMIT is at least as restrictive as the one
2803                recorded in the hash table, in which case we have no
2804                hope of synthesizing a multiplication.  Just
2805                return.  */
2806             return;
2807
2808           /* If we get here, COST_LIMIT is less restrictive than the
2809              one recorded in the hash table, so we may be able to
2810              synthesize a multiplication.  Proceed as if we didn't
2811              have the cache entry.  */
2812         }
2813       else
2814         {
2815           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2816             /* The cached algorithm shows that this multiplication
2817                requires more cost than COST_LIMIT.  Just return.  This
2818                way, we don't clobber this cache entry with
2819                alg_impossible but retain useful information.  */
2820             return;
2821
2822           cache_hit = true;
2823
2824           switch (cache_alg)
2825             {
2826             case alg_shift:
2827               goto do_alg_shift;
2828
2829             case alg_add_t_m2:
2830             case alg_sub_t_m2:
2831               goto do_alg_addsub_t_m2;
2832
2833             case alg_add_factor:
2834             case alg_sub_factor:
2835               goto do_alg_addsub_factor;
2836
2837             case alg_add_t2_m:
2838               goto do_alg_add_t2_m;
2839
2840             case alg_sub_t2_m:
2841               goto do_alg_sub_t2_m;
2842
2843             default:
2844               gcc_unreachable ();
2845             }
2846         }
2847     }
2848
2849   /* If we have a group of zero bits at the low-order part of T, try
2850      multiplying by the remaining bits and then doing a shift.  */
2851
2852   if ((t & 1) == 0)
2853     {
2854     do_alg_shift:
2855       m = ctz_or_zero (t); /* m = number of low zero bits */
2856       if (m < maxm)
2857         {
2858           q = t >> m;
2859           /* The function expand_shift will choose between a shift and
2860              a sequence of additions, so the observed cost is given as
2861              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2862           op_cost = m * add_cost (speed, mode);
2863           if (shift_cost (speed, mode, m) < op_cost)
2864             op_cost = shift_cost (speed, mode, m);
2865           new_limit.cost = best_cost.cost - op_cost;
2866           new_limit.latency = best_cost.latency - op_cost;
2867           synth_mult (alg_in, q, &new_limit, mode);
2868
2869           alg_in->cost.cost += op_cost;
2870           alg_in->cost.latency += op_cost;
2871           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2872             {
2873               best_cost = alg_in->cost;
2874               std::swap (alg_in, best_alg);
2875               best_alg->log[best_alg->ops] = m;
2876               best_alg->op[best_alg->ops] = alg_shift;
2877             }
2878
2879           /* See if treating ORIG_T as a signed number yields a better
2880              sequence.  Try this sequence only for a negative ORIG_T
2881              as it would be useless for a non-negative ORIG_T.  */
2882           if ((HOST_WIDE_INT) orig_t < 0)
2883             {
2884               /* Shift ORIG_T as follows because a right shift of a
2885                  negative-valued signed type is implementation
2886                  defined.  */
2887               q = ~(~orig_t >> m);
2888               /* The function expand_shift will choose between a shift
2889                  and a sequence of additions, so the observed cost is
2890                  given as MIN (m * add_cost(speed, mode),
2891                  shift_cost(speed, mode, m)).  */
2892               op_cost = m * add_cost (speed, mode);
2893               if (shift_cost (speed, mode, m) < op_cost)
2894                 op_cost = shift_cost (speed, mode, m);
2895               new_limit.cost = best_cost.cost - op_cost;
2896               new_limit.latency = best_cost.latency - op_cost;
2897               synth_mult (alg_in, q, &new_limit, mode);
2898
2899               alg_in->cost.cost += op_cost;
2900               alg_in->cost.latency += op_cost;
2901               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2902                 {
2903                   best_cost = alg_in->cost;
2904                   std::swap (alg_in, best_alg);
2905                   best_alg->log[best_alg->ops] = m;
2906                   best_alg->op[best_alg->ops] = alg_shift;
2907                 }
2908             }
2909         }
2910       if (cache_hit)
2911         goto done;
2912     }
2913
2914   /* If we have an odd number, add or subtract one.  */
2915   if ((t & 1) != 0)
2916     {
2917       unsigned HOST_WIDE_INT w;
2918
2919     do_alg_addsub_t_m2:
2920       for (w = 1; (w & t) != 0; w <<= 1)
2921         ;
2922       /* If T was -1, then W will be zero after the loop.  This is another
2923          case where T ends with ...111.  Handling this with (T + 1) and
2924          subtract 1 produces slightly better code and results in algorithm
2925          selection much faster than treating it like the ...0111 case
2926          below.  */
2927       if (w == 0
2928           || (w > 2
2929               /* Reject the case where t is 3.
2930                  Thus we prefer addition in that case.  */
2931               && t != 3))
2932         {
2933           /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2934
2935           op_cost = add_cost (speed, mode);
2936           new_limit.cost = best_cost.cost - op_cost;
2937           new_limit.latency = best_cost.latency - op_cost;
2938           synth_mult (alg_in, t + 1, &new_limit, mode);
2939
2940           alg_in->cost.cost += op_cost;
2941           alg_in->cost.latency += op_cost;
2942           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2943             {
2944               best_cost = alg_in->cost;
2945               std::swap (alg_in, best_alg);
2946               best_alg->log[best_alg->ops] = 0;
2947               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2948             }
2949         }
2950       else
2951         {
2952           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2953
2954           op_cost = add_cost (speed, mode);
2955           new_limit.cost = best_cost.cost - op_cost;
2956           new_limit.latency = best_cost.latency - op_cost;
2957           synth_mult (alg_in, t - 1, &new_limit, mode);
2958
2959           alg_in->cost.cost += op_cost;
2960           alg_in->cost.latency += op_cost;
2961           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2962             {
2963               best_cost = alg_in->cost;
2964               std::swap (alg_in, best_alg);
2965               best_alg->log[best_alg->ops] = 0;
2966               best_alg->op[best_alg->ops] = alg_add_t_m2;
2967             }
2968         }
2969
2970       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2971          quickly with a - a * n for some appropriate constant n.  */
2972       m = exact_log2 (-orig_t + 1);
2973       if (m >= 0 && m < maxm)
2974         {
2975           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2976           /* If the target has a cheap shift-and-subtract insn use
2977              that in preference to a shift insn followed by a sub insn.
2978              Assume that the shift-and-sub is "atomic" with a latency
2979              equal to it's cost, otherwise assume that on superscalar
2980              hardware the shift may be executed concurrently with the
2981              earlier steps in the algorithm.  */
2982           if (shiftsub1_cost (speed, mode, m) <= op_cost)
2983             {
2984               op_cost = shiftsub1_cost (speed, mode, m);
2985               op_latency = op_cost;
2986             }
2987           else
2988             op_latency = add_cost (speed, mode);
2989
2990           new_limit.cost = best_cost.cost - op_cost;
2991           new_limit.latency = best_cost.latency - op_latency;
2992           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2993                       &new_limit, mode);
2994
2995           alg_in->cost.cost += op_cost;
2996           alg_in->cost.latency += op_latency;
2997           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2998             {
2999               best_cost = alg_in->cost;
3000               std::swap (alg_in, best_alg);
3001               best_alg->log[best_alg->ops] = m;
3002               best_alg->op[best_alg->ops] = alg_sub_t_m2;
3003             }
3004         }
3005
3006       if (cache_hit)
3007         goto done;
3008     }
3009
3010   /* Look for factors of t of the form
3011      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
3012      If we find such a factor, we can multiply by t using an algorithm that
3013      multiplies by q, shift the result by m and add/subtract it to itself.
3014
3015      We search for large factors first and loop down, even if large factors
3016      are less probable than small; if we find a large factor we will find a
3017      good sequence quickly, and therefore be able to prune (by decreasing
3018      COST_LIMIT) the search.  */
3019
3020  do_alg_addsub_factor:
3021   for (m = floor_log2 (t - 1); m >= 2; m--)
3022     {
3023       unsigned HOST_WIDE_INT d;
3024
3025       d = (HOST_WIDE_INT_1U << m) + 1;
3026       if (t % d == 0 && t > d && m < maxm
3027           && (!cache_hit || cache_alg == alg_add_factor))
3028         {
3029           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3030           if (shiftadd_cost (speed, mode, m) <= op_cost)
3031             op_cost = shiftadd_cost (speed, mode, m);
3032
3033           op_latency = op_cost;
3034
3035
3036           new_limit.cost = best_cost.cost - op_cost;
3037           new_limit.latency = best_cost.latency - op_latency;
3038           synth_mult (alg_in, t / d, &new_limit, mode);
3039
3040           alg_in->cost.cost += op_cost;
3041           alg_in->cost.latency += op_latency;
3042           if (alg_in->cost.latency < op_cost)
3043             alg_in->cost.latency = op_cost;
3044           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3045             {
3046               best_cost = alg_in->cost;
3047               std::swap (alg_in, best_alg);
3048               best_alg->log[best_alg->ops] = m;
3049               best_alg->op[best_alg->ops] = alg_add_factor;
3050             }
3051           /* Other factors will have been taken care of in the recursion.  */
3052           break;
3053         }
3054
3055       d = (HOST_WIDE_INT_1U << m) - 1;
3056       if (t % d == 0 && t > d && m < maxm
3057           && (!cache_hit || cache_alg == alg_sub_factor))
3058         {
3059           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3060           if (shiftsub0_cost (speed, mode, m) <= op_cost)
3061             op_cost = shiftsub0_cost (speed, mode, m);
3062
3063           op_latency = op_cost;
3064
3065           new_limit.cost = best_cost.cost - op_cost;
3066           new_limit.latency = best_cost.latency - op_latency;
3067           synth_mult (alg_in, t / d, &new_limit, mode);
3068
3069           alg_in->cost.cost += op_cost;
3070           alg_in->cost.latency += op_latency;
3071           if (alg_in->cost.latency < op_cost)
3072             alg_in->cost.latency = op_cost;
3073           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3074             {
3075               best_cost = alg_in->cost;
3076               std::swap (alg_in, best_alg);
3077               best_alg->log[best_alg->ops] = m;
3078               best_alg->op[best_alg->ops] = alg_sub_factor;
3079             }
3080           break;
3081         }
3082     }
3083   if (cache_hit)
3084     goto done;
3085
3086   /* Try shift-and-add (load effective address) instructions,
3087      i.e. do a*3, a*5, a*9.  */
3088   if ((t & 1) != 0)
3089     {
3090     do_alg_add_t2_m:
3091       q = t - 1;
3092       m = ctz_hwi (q);
3093       if (q && m < maxm)
3094         {
3095           op_cost = shiftadd_cost (speed, mode, m);
3096           new_limit.cost = best_cost.cost - op_cost;
3097           new_limit.latency = best_cost.latency - op_cost;
3098           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
3099
3100           alg_in->cost.cost += op_cost;
3101           alg_in->cost.latency += op_cost;
3102           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3103             {
3104               best_cost = alg_in->cost;
3105               std::swap (alg_in, best_alg);
3106               best_alg->log[best_alg->ops] = m;
3107               best_alg->op[best_alg->ops] = alg_add_t2_m;
3108             }
3109         }
3110       if (cache_hit)
3111         goto done;
3112
3113     do_alg_sub_t2_m:
3114       q = t + 1;
3115       m = ctz_hwi (q);
3116       if (q && m < maxm)
3117         {
3118           op_cost = shiftsub0_cost (speed, mode, m);
3119           new_limit.cost = best_cost.cost - op_cost;
3120           new_limit.latency = best_cost.latency - op_cost;
3121           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
3122
3123           alg_in->cost.cost += op_cost;
3124           alg_in->cost.latency += op_cost;
3125           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3126             {
3127               best_cost = alg_in->cost;
3128               std::swap (alg_in, best_alg);
3129               best_alg->log[best_alg->ops] = m;
3130               best_alg->op[best_alg->ops] = alg_sub_t2_m;
3131             }
3132         }
3133       if (cache_hit)
3134         goto done;
3135     }
3136
3137  done:
3138   /* If best_cost has not decreased, we have not found any algorithm.  */
3139   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
3140     {
3141       /* We failed to find an algorithm.  Record alg_impossible for
3142          this case (that is, <T, MODE, COST_LIMIT>) so that next time
3143          we are asked to find an algorithm for T within the same or
3144          lower COST_LIMIT, we can immediately return to the
3145          caller.  */
3146       entry_ptr->t = t;
3147       entry_ptr->mode = mode;
3148       entry_ptr->speed = speed;
3149       entry_ptr->alg = alg_impossible;
3150       entry_ptr->cost = *cost_limit;
3151       return;
3152     }
3153
3154   /* Cache the result.  */
3155   if (!cache_hit)
3156     {
3157       entry_ptr->t = t;
3158       entry_ptr->mode = mode;
3159       entry_ptr->speed = speed;
3160       entry_ptr->alg = best_alg->op[best_alg->ops];
3161       entry_ptr->cost.cost = best_cost.cost;
3162       entry_ptr->cost.latency = best_cost.latency;
3163     }
3164
3165   /* If we are getting a too long sequence for `struct algorithm'
3166      to record, make this search fail.  */
3167   if (best_alg->ops == MAX_BITS_PER_WORD)
3168     return;
3169
3170   /* Copy the algorithm from temporary space to the space at alg_out.
3171      We avoid using structure assignment because the majority of
3172      best_alg is normally undefined, and this is a critical function.  */
3173   alg_out->ops = best_alg->ops + 1;
3174   alg_out->cost = best_cost;
3175   memcpy (alg_out->op, best_alg->op,
3176           alg_out->ops * sizeof *alg_out->op);
3177   memcpy (alg_out->log, best_alg->log,
3178           alg_out->ops * sizeof *alg_out->log);
3179 }
3180 \f
3181 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3182    Try three variations:
3183
3184        - a shift/add sequence based on VAL itself
3185        - a shift/add sequence based on -VAL, followed by a negation
3186        - a shift/add sequence based on VAL - 1, followed by an addition.
3187
3188    Return true if the cheapest of these cost less than MULT_COST,
3189    describing the algorithm in *ALG and final fixup in *VARIANT.  */
3190
3191 bool
3192 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3193                      struct algorithm *alg, enum mult_variant *variant,
3194                      int mult_cost)
3195 {
3196   struct algorithm alg2;
3197   struct mult_cost limit;
3198   int op_cost;
3199   bool speed = optimize_insn_for_speed_p ();
3200
3201   /* Fail quickly for impossible bounds.  */
3202   if (mult_cost < 0)
3203     return false;
3204
3205   /* Ensure that mult_cost provides a reasonable upper bound.
3206      Any constant multiplication can be performed with less
3207      than 2 * bits additions.  */
3208   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3209   if (mult_cost > op_cost)
3210     mult_cost = op_cost;
3211
3212   *variant = basic_variant;
3213   limit.cost = mult_cost;
3214   limit.latency = mult_cost;
3215   synth_mult (alg, val, &limit, mode);
3216
3217   /* This works only if the inverted value actually fits in an
3218      `unsigned int' */
3219   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3220     {
3221       op_cost = neg_cost (speed, mode);
3222       if (MULT_COST_LESS (&alg->cost, mult_cost))
3223         {
3224           limit.cost = alg->cost.cost - op_cost;
3225           limit.latency = alg->cost.latency - op_cost;
3226         }
3227       else
3228         {
3229           limit.cost = mult_cost - op_cost;
3230           limit.latency = mult_cost - op_cost;
3231         }
3232
3233       synth_mult (&alg2, -val, &limit, mode);
3234       alg2.cost.cost += op_cost;
3235       alg2.cost.latency += op_cost;
3236       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3237         *alg = alg2, *variant = negate_variant;
3238     }
3239
3240   /* This proves very useful for division-by-constant.  */
3241   op_cost = add_cost (speed, mode);
3242   if (MULT_COST_LESS (&alg->cost, mult_cost))
3243     {
3244       limit.cost = alg->cost.cost - op_cost;
3245       limit.latency = alg->cost.latency - op_cost;
3246     }
3247   else
3248     {
3249       limit.cost = mult_cost - op_cost;
3250       limit.latency = mult_cost - op_cost;
3251     }
3252
3253   synth_mult (&alg2, val - 1, &limit, mode);
3254   alg2.cost.cost += op_cost;
3255   alg2.cost.latency += op_cost;
3256   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3257     *alg = alg2, *variant = add_variant;
3258
3259   return MULT_COST_LESS (&alg->cost, mult_cost);
3260 }
3261
3262 /* A subroutine of expand_mult, used for constant multiplications.
3263    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3264    convenient.  Use the shift/add sequence described by ALG and apply
3265    the final fixup specified by VARIANT.  */
3266
3267 static rtx
3268 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3269                    rtx target, const struct algorithm *alg,
3270                    enum mult_variant variant)
3271 {
3272   unsigned HOST_WIDE_INT val_so_far;
3273   rtx_insn *insn;
3274   rtx accum, tem;
3275   int opno;
3276   machine_mode nmode;
3277
3278   /* Avoid referencing memory over and over and invalid sharing
3279      on SUBREGs.  */
3280   op0 = force_reg (mode, op0);
3281
3282   /* ACCUM starts out either as OP0 or as a zero, depending on
3283      the first operation.  */
3284
3285   if (alg->op[0] == alg_zero)
3286     {
3287       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3288       val_so_far = 0;
3289     }
3290   else if (alg->op[0] == alg_m)
3291     {
3292       accum = copy_to_mode_reg (mode, op0);
3293       val_so_far = 1;
3294     }
3295   else
3296     gcc_unreachable ();
3297
3298   for (opno = 1; opno < alg->ops; opno++)
3299     {
3300       int log = alg->log[opno];
3301       rtx shift_subtarget = optimize ? 0 : accum;
3302       rtx add_target
3303         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3304            && !optimize)
3305           ? target : 0;
3306       rtx accum_target = optimize ? 0 : accum;
3307       rtx accum_inner;
3308
3309       switch (alg->op[opno])
3310         {
3311         case alg_shift:
3312           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3313           /* REG_EQUAL note will be attached to the following insn.  */
3314           emit_move_insn (accum, tem);
3315           val_so_far <<= log;
3316           break;
3317
3318         case alg_add_t_m2:
3319           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3320           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3321                                  add_target ? add_target : accum_target);
3322           val_so_far += HOST_WIDE_INT_1U << log;
3323           break;
3324
3325         case alg_sub_t_m2:
3326           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3327           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3328                                  add_target ? add_target : accum_target);
3329           val_so_far -= HOST_WIDE_INT_1U << log;
3330           break;
3331
3332         case alg_add_t2_m:
3333           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3334                                 log, shift_subtarget, 0);
3335           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3336                                  add_target ? add_target : accum_target);
3337           val_so_far = (val_so_far << log) + 1;
3338           break;
3339
3340         case alg_sub_t2_m:
3341           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3342                                 log, shift_subtarget, 0);
3343           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3344                                  add_target ? add_target : accum_target);
3345           val_so_far = (val_so_far << log) - 1;
3346           break;
3347
3348         case alg_add_factor:
3349           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3350           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3351                                  add_target ? add_target : accum_target);
3352           val_so_far += val_so_far << log;
3353           break;
3354
3355         case alg_sub_factor:
3356           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3357           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3358                                  (add_target
3359                                   ? add_target : (optimize ? 0 : tem)));
3360           val_so_far = (val_so_far << log) - val_so_far;
3361           break;
3362
3363         default:
3364           gcc_unreachable ();
3365         }
3366
3367       if (SCALAR_INT_MODE_P (mode))
3368         {
3369           /* Write a REG_EQUAL note on the last insn so that we can cse
3370              multiplication sequences.  Note that if ACCUM is a SUBREG,
3371              we've set the inner register and must properly indicate that.  */
3372           tem = op0, nmode = mode;
3373           accum_inner = accum;
3374           if (GET_CODE (accum) == SUBREG)
3375             {
3376               accum_inner = SUBREG_REG (accum);
3377               nmode = GET_MODE (accum_inner);
3378               tem = gen_lowpart (nmode, op0);
3379             }
3380
3381           /* Don't add a REG_EQUAL note if tem is a paradoxical SUBREG.
3382              In that case, only the low bits of accum would be guaranteed to
3383              be equal to the content of the REG_EQUAL note, the upper bits
3384              can be anything.  */
3385           if (!paradoxical_subreg_p (tem))
3386             {
3387               insn = get_last_insn ();
3388               wide_int wval_so_far
3389                 = wi::uhwi (val_so_far,
3390                             GET_MODE_PRECISION (as_a <scalar_mode> (nmode)));
3391               rtx c = immed_wide_int_const (wval_so_far, nmode);
3392               set_dst_reg_note (insn, REG_EQUAL, gen_rtx_MULT (nmode, tem, c),
3393                                 accum_inner);
3394             }
3395         }
3396     }
3397
3398   if (variant == negate_variant)
3399     {
3400       val_so_far = -val_so_far;
3401       accum = expand_unop (mode, neg_optab, accum, target, 0);
3402     }
3403   else if (variant == add_variant)
3404     {
3405       val_so_far = val_so_far + 1;
3406       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3407     }
3408
3409   /* Compare only the bits of val and val_so_far that are significant
3410      in the result mode, to avoid sign-/zero-extension confusion.  */
3411   nmode = GET_MODE_INNER (mode);
3412   val &= GET_MODE_MASK (nmode);
3413   val_so_far &= GET_MODE_MASK (nmode);
3414   gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3415
3416   return accum;
3417 }
3418
3419 /* Perform a multiplication and return an rtx for the result.
3420    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3421    TARGET is a suggestion for where to store the result (an rtx).
3422
3423    We check specially for a constant integer as OP1.
3424    If you want this check for OP0 as well, then before calling
3425    you should swap the two operands if OP0 would be constant.  */
3426
3427 rtx
3428 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3429              int unsignedp, bool no_libcall)
3430 {
3431   enum mult_variant variant;
3432   struct algorithm algorithm;
3433   rtx scalar_op1;
3434   int max_cost;
3435   bool speed = optimize_insn_for_speed_p ();
3436   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3437
3438   if (CONSTANT_P (op0))
3439     std::swap (op0, op1);
3440
3441   /* For vectors, there are several simplifications that can be made if
3442      all elements of the vector constant are identical.  */
3443   scalar_op1 = unwrap_const_vec_duplicate (op1);
3444
3445   if (INTEGRAL_MODE_P (mode))
3446     {
3447       rtx fake_reg;
3448       HOST_WIDE_INT coeff;
3449       bool is_neg;
3450       int mode_bitsize;
3451
3452       if (op1 == CONST0_RTX (mode))
3453         return op1;
3454       if (op1 == CONST1_RTX (mode))
3455         return op0;
3456       if (op1 == CONSTM1_RTX (mode))
3457         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3458                             op0, target, 0);
3459
3460       if (do_trapv)
3461         goto skip_synth;
3462
3463       /* If mode is integer vector mode, check if the backend supports
3464          vector lshift (by scalar or vector) at all.  If not, we can't use
3465          synthetized multiply.  */
3466       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3467           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3468           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3469         goto skip_synth;
3470
3471       /* These are the operations that are potentially turned into
3472          a sequence of shifts and additions.  */
3473       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3474
3475       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3476          less than or equal in size to `unsigned int' this doesn't matter.
3477          If the mode is larger than `unsigned int', then synth_mult works
3478          only if the constant value exactly fits in an `unsigned int' without
3479          any truncation.  This means that multiplying by negative values does
3480          not work; results are off by 2^32 on a 32 bit machine.  */
3481       if (CONST_INT_P (scalar_op1))
3482         {
3483           coeff = INTVAL (scalar_op1);
3484           is_neg = coeff < 0;
3485         }
3486 #if TARGET_SUPPORTS_WIDE_INT
3487       else if (CONST_WIDE_INT_P (scalar_op1))
3488 #else
3489       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3490 #endif
3491         {
3492           int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3493           /* Perfect power of 2 (other than 1, which is handled above).  */
3494           if (shift > 0)
3495             return expand_shift (LSHIFT_EXPR, mode, op0,
3496                                  shift, target, unsignedp);
3497           else
3498             goto skip_synth;
3499         }
3500       else
3501         goto skip_synth;
3502
3503       /* We used to test optimize here, on the grounds that it's better to
3504          produce a smaller program when -O is not used.  But this causes
3505          such a terrible slowdown sometimes that it seems better to always
3506          use synth_mult.  */
3507
3508       /* Special case powers of two.  */
3509       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3510           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3511         return expand_shift (LSHIFT_EXPR, mode, op0,
3512                              floor_log2 (coeff), target, unsignedp);
3513
3514       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3515
3516       /* Attempt to handle multiplication of DImode values by negative
3517          coefficients, by performing the multiplication by a positive
3518          multiplier and then inverting the result.  */
3519       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3520         {
3521           /* Its safe to use -coeff even for INT_MIN, as the
3522              result is interpreted as an unsigned coefficient.
3523              Exclude cost of op0 from max_cost to match the cost
3524              calculation of the synth_mult.  */
3525           coeff = -(unsigned HOST_WIDE_INT) coeff;
3526           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3527                                     mode, speed)
3528                       - neg_cost (speed, mode));
3529           if (max_cost <= 0)
3530             goto skip_synth;
3531
3532           /* Special case powers of two.  */
3533           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3534             {
3535               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3536                                        floor_log2 (coeff), target, unsignedp);
3537               return expand_unop (mode, neg_optab, temp, target, 0);
3538             }
3539
3540           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3541                                    max_cost))
3542             {
3543               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3544                                             &algorithm, variant);
3545               return expand_unop (mode, neg_optab, temp, target, 0);
3546             }
3547           goto skip_synth;
3548         }
3549
3550       /* Exclude cost of op0 from max_cost to match the cost
3551          calculation of the synth_mult.  */
3552       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3553       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3554         return expand_mult_const (mode, op0, coeff, target,
3555                                   &algorithm, variant);
3556     }
3557  skip_synth:
3558
3559   /* Expand x*2.0 as x+x.  */
3560   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3561       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3562     {
3563       op0 = force_reg (GET_MODE (op0), op0);
3564       return expand_binop (mode, add_optab, op0, op0,
3565                            target, unsignedp,
3566                            no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3567     }
3568
3569   /* This used to use umul_optab if unsigned, but for non-widening multiply
3570      there is no difference between signed and unsigned.  */
3571   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3572                       op0, op1, target, unsignedp,
3573                       no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3574   gcc_assert (op0 || no_libcall);
3575   return op0;
3576 }
3577
3578 /* Return a cost estimate for multiplying a register by the given
3579    COEFFicient in the given MODE and SPEED.  */
3580
3581 int
3582 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3583 {
3584   int max_cost;
3585   struct algorithm algorithm;
3586   enum mult_variant variant;
3587
3588   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3589   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3590                            mode, speed);
3591   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3592     return algorithm.cost.cost;
3593   else
3594     return max_cost;
3595 }
3596
3597 /* Perform a widening multiplication and return an rtx for the result.
3598    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3599    TARGET is a suggestion for where to store the result (an rtx).
3600    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3601    or smul_widen_optab.
3602
3603    We check specially for a constant integer as OP1, comparing the
3604    cost of a widening multiply against the cost of a sequence of shifts
3605    and adds.  */
3606
3607 rtx
3608 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3609                       int unsignedp, optab this_optab)
3610 {
3611   bool speed = optimize_insn_for_speed_p ();
3612   rtx cop1;
3613
3614   if (CONST_INT_P (op1)
3615       && GET_MODE (op0) != VOIDmode
3616       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3617                                 this_optab == umul_widen_optab))
3618       && CONST_INT_P (cop1)
3619       && (INTVAL (cop1) >= 0
3620           || HWI_COMPUTABLE_MODE_P (mode)))
3621     {
3622       HOST_WIDE_INT coeff = INTVAL (cop1);
3623       int max_cost;
3624       enum mult_variant variant;
3625       struct algorithm algorithm;
3626
3627       if (coeff == 0)
3628         return CONST0_RTX (mode);
3629
3630       /* Special case powers of two.  */
3631       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3632         {
3633           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3634           return expand_shift (LSHIFT_EXPR, mode, op0,
3635                                floor_log2 (coeff), target, unsignedp);
3636         }
3637
3638       /* Exclude cost of op0 from max_cost to match the cost
3639          calculation of the synth_mult.  */
3640       max_cost = mul_widen_cost (speed, mode);
3641       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3642                                max_cost))
3643         {
3644           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3645           return expand_mult_const (mode, op0, coeff, target,
3646                                     &algorithm, variant);
3647         }
3648     }
3649   return expand_binop (mode, this_optab, op0, op1, target,
3650                        unsignedp, OPTAB_LIB_WIDEN);
3651 }
3652 \f
3653 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3654    replace division by D, and put the least significant N bits of the result
3655    in *MULTIPLIER_PTR and return the most significant bit.
3656
3657    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3658    needed precision is in PRECISION (should be <= N).
3659
3660    PRECISION should be as small as possible so this function can choose
3661    multiplier more freely.
3662
3663    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3664    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3665
3666    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3667    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3668
3669 unsigned HOST_WIDE_INT
3670 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3671                    unsigned HOST_WIDE_INT *multiplier_ptr,
3672                    int *post_shift_ptr, int *lgup_ptr)
3673 {
3674   int lgup, post_shift;
3675   int pow, pow2;
3676
3677   /* lgup = ceil(log2(divisor)); */
3678   lgup = ceil_log2 (d);
3679
3680   gcc_assert (lgup <= n);
3681
3682   pow = n + lgup;
3683   pow2 = n + lgup - precision;
3684
3685   /* mlow = 2^(N + lgup)/d */
3686   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3687   wide_int mlow = wi::udiv_trunc (val, d);
3688
3689   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3690   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3691   wide_int mhigh = wi::udiv_trunc (val, d);
3692
3693   /* If precision == N, then mlow, mhigh exceed 2^N
3694      (but they do not exceed 2^(N+1)).  */
3695
3696   /* Reduce to lowest terms.  */
3697   for (post_shift = lgup; post_shift > 0; post_shift--)
3698     {
3699       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3700                                                        HOST_BITS_PER_WIDE_INT);
3701       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3702                                                        HOST_BITS_PER_WIDE_INT);
3703       if (ml_lo >= mh_lo)
3704         break;
3705
3706       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3707       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3708     }
3709
3710   *post_shift_ptr = post_shift;
3711   *lgup_ptr = lgup;
3712   if (n < HOST_BITS_PER_WIDE_INT)
3713     {
3714       unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3715       *multiplier_ptr = mhigh.to_uhwi () & mask;
3716       return mhigh.to_uhwi () > mask;
3717     }
3718   else
3719     {
3720       *multiplier_ptr = mhigh.to_uhwi ();
3721       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3722     }
3723 }
3724
3725 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3726    congruent to 1 (mod 2**N).  */
3727
3728 static unsigned HOST_WIDE_INT
3729 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3730 {
3731   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3732
3733   /* The algorithm notes that the choice y = x satisfies
3734      x*y == 1 mod 2^3, since x is assumed odd.
3735      Each iteration doubles the number of bits of significance in y.  */
3736
3737   unsigned HOST_WIDE_INT mask;
3738   unsigned HOST_WIDE_INT y = x;
3739   int nbit = 3;
3740
3741   mask = (n == HOST_BITS_PER_WIDE_INT
3742           ? HOST_WIDE_INT_M1U
3743           : (HOST_WIDE_INT_1U << n) - 1);
3744
3745   while (nbit < n)
3746     {
3747       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3748       nbit *= 2;
3749     }
3750   return y;
3751 }
3752
3753 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3754    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3755    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3756    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3757    become signed.
3758
3759    The result is put in TARGET if that is convenient.
3760
3761    MODE is the mode of operation.  */
3762
3763 rtx
3764 expand_mult_highpart_adjust (scalar_int_mode mode, rtx adj_operand, rtx op0,
3765                              rtx op1, rtx target, int unsignedp)
3766 {
3767   rtx tem;
3768   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3769
3770   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3771                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3772   tem = expand_and (mode, tem, op1, NULL_RTX);
3773   adj_operand
3774     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3775                      adj_operand);
3776
3777   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3778                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3779   tem = expand_and (mode, tem, op0, NULL_RTX);
3780   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3781                           target);
3782
3783   return target;
3784 }
3785
3786 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3787
3788 static rtx
3789 extract_high_half (scalar_int_mode mode, rtx op)
3790 {
3791   if (mode == word_mode)
3792     return gen_highpart (mode, op);
3793
3794   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3795
3796   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3797                      GET_MODE_BITSIZE (mode), 0, 1);
3798   return convert_modes (mode, wider_mode, op, 0);
3799 }
3800
3801 /* Like expmed_mult_highpart, but only consider using a multiplication
3802    optab.  OP1 is an rtx for the constant operand.  */
3803
3804 static rtx
3805 expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1,
3806                             rtx target, int unsignedp, int max_cost)
3807 {
3808   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3809   optab moptab;
3810   rtx tem;
3811   int size;
3812   bool speed = optimize_insn_for_speed_p ();
3813
3814   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3815
3816   size = GET_MODE_BITSIZE (mode);
3817
3818   /* Firstly, try using a multiplication insn that only generates the needed
3819      high part of the product, and in the sign flavor of unsignedp.  */
3820   if (mul_highpart_cost (speed, mode) < max_cost)
3821     {
3822       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3823       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3824                           unsignedp, OPTAB_DIRECT);
3825       if (tem)
3826         return tem;
3827     }
3828
3829   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3830      Need to adjust the result after the multiplication.  */
3831   if (size - 1 < BITS_PER_WORD
3832       && (mul_highpart_cost (speed, mode)
3833           + 2 * shift_cost (speed, mode, size-1)
3834           + 4 * add_cost (speed, mode) < max_cost))
3835     {
3836       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3837       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3838                           unsignedp, OPTAB_DIRECT);
3839       if (tem)
3840         /* We used the wrong signedness.  Adjust the result.  */
3841         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3842                                             tem, unsignedp);
3843     }
3844
3845   /* Try widening multiplication.  */
3846   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3847   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3848       && mul_widen_cost (speed, wider_mode) < max_cost)
3849     {
3850       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3851                           unsignedp, OPTAB_WIDEN);
3852       if (tem)
3853         return extract_high_half (mode, tem);
3854     }
3855
3856   /* Try widening the mode and perform a non-widening multiplication.  */
3857   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3858       && size - 1 < BITS_PER_WORD
3859       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3860           < max_cost))
3861     {
3862       rtx_insn *insns;
3863       rtx wop0, wop1;
3864
3865       /* We need to widen the operands, for example to ensure the
3866          constant multiplier is correctly sign or zero extended.
3867          Use a sequence to clean-up any instructions emitted by
3868          the conversions if things don't work out.  */
3869       start_sequence ();
3870       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3871       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3872       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3873                           unsignedp, OPTAB_WIDEN);
3874       insns = get_insns ();
3875       end_sequence ();
3876
3877       if (tem)
3878         {
3879           emit_insn (insns);
3880           return extract_high_half (mode, tem);
3881         }
3882     }
3883
3884   /* Try widening multiplication of opposite signedness, and adjust.  */
3885   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3886   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3887       && size - 1 < BITS_PER_WORD
3888       && (mul_widen_cost (speed, wider_mode)
3889           + 2 * shift_cost (speed, mode, size-1)
3890           + 4 * add_cost (speed, mode) < max_cost))
3891     {
3892       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3893                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3894       if (tem != 0)
3895         {
3896           tem = extract_high_half (mode, tem);
3897           /* We used the wrong signedness.  Adjust the result.  */
3898           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3899                                               target, unsignedp);
3900         }
3901     }
3902
3903   return 0;
3904 }
3905
3906 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3907    putting the high half of the result in TARGET if that is convenient,
3908    and return where the result is.  If the operation cannot be performed,
3909    0 is returned.
3910
3911    MODE is the mode of operation and result.
3912
3913    UNSIGNEDP nonzero means unsigned multiply.
3914
3915    MAX_COST is the total allowed cost for the expanded RTL.  */
3916
3917 static rtx
3918 expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1,
3919                       rtx target, int unsignedp, int max_cost)
3920 {
3921   unsigned HOST_WIDE_INT cnst1;
3922   int extra_cost;
3923   bool sign_adjust = false;
3924   enum mult_variant variant;
3925   struct algorithm alg;
3926   rtx tem;
3927   bool speed = optimize_insn_for_speed_p ();
3928
3929   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3930   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3931
3932   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3933
3934   /* We can't optimize modes wider than BITS_PER_WORD.
3935      ??? We might be able to perform double-word arithmetic if
3936      mode == word_mode, however all the cost calculations in
3937      synth_mult etc. assume single-word operations.  */
3938   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3939   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3940     return expmed_mult_highpart_optab (mode, op0, op1, target,
3941                                        unsignedp, max_cost);
3942
3943   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3944
3945   /* Check whether we try to multiply by a negative constant.  */
3946   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3947     {
3948       sign_adjust = true;
3949       extra_cost += add_cost (speed, mode);
3950     }
3951
3952   /* See whether shift/add multiplication is cheap enough.  */
3953   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3954                            max_cost - extra_cost))
3955     {
3956       /* See whether the specialized multiplication optabs are
3957          cheaper than the shift/add version.  */
3958       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3959                                         alg.cost.cost + extra_cost);
3960       if (tem)
3961         return tem;
3962
3963       tem = convert_to_mode (wider_mode, op0, unsignedp);
3964       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3965       tem = extract_high_half (mode, tem);
3966
3967       /* Adjust result for signedness.  */
3968       if (sign_adjust)
3969         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3970
3971       return tem;
3972     }
3973   return expmed_mult_highpart_optab (mode, op0, op1, target,
3974                                      unsignedp, max_cost);
3975 }
3976
3977
3978 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3979
3980 static rtx
3981 expand_smod_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
3982 {
3983   rtx result, temp, shift;
3984   rtx_code_label *label;
3985   int logd;
3986   int prec = GET_MODE_PRECISION (mode);
3987
3988   logd = floor_log2 (d);
3989   result = gen_reg_rtx (mode);
3990
3991   /* Avoid conditional branches when they're expensive.  */
3992   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3993       && optimize_insn_for_speed_p ())
3994     {
3995       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3996                                       mode, 0, -1);
3997       if (signmask)
3998         {
3999           HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
4000           signmask = force_reg (mode, signmask);
4001           shift = gen_int_shift_amount (mode, GET_MODE_BITSIZE (mode) - logd);
4002
4003           /* Use the rtx_cost of a LSHIFTRT instruction to determine
4004              which instruction sequence to use.  If logical right shifts
4005              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
4006              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
4007
4008           temp = gen_rtx_LSHIFTRT (mode, result, shift);
4009           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
4010               || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
4011                   > COSTS_N_INSNS (2)))
4012             {
4013               temp = expand_binop (mode, xor_optab, op0, signmask,
4014                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4015               temp = expand_binop (mode, sub_optab, temp, signmask,
4016                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4017               temp = expand_binop (mode, and_optab, temp,
4018                                    gen_int_mode (masklow, mode),
4019                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4020               temp = expand_binop (mode, xor_optab, temp, signmask,
4021                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4022               temp = expand_binop (mode, sub_optab, temp, signmask,
4023                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4024             }
4025           else
4026             {
4027               signmask = expand_binop (mode, lshr_optab, signmask, shift,
4028                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
4029               signmask = force_reg (mode, signmask);
4030
4031               temp = expand_binop (mode, add_optab, op0, signmask,
4032                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4033               temp = expand_binop (mode, and_optab, temp,
4034                                    gen_int_mode (masklow, mode),
4035                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4036               temp = expand_binop (mode, sub_optab, temp, signmask,
4037                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4038             }
4039           return temp;
4040         }
4041     }
4042
4043   /* Mask contains the mode's signbit and the significant bits of the
4044      modulus.  By including the signbit in the operation, many targets
4045      can avoid an explicit compare operation in the following comparison
4046      against zero.  */
4047   wide_int mask = wi::mask (logd, false, prec);
4048   mask = wi::set_bit (mask, prec - 1);
4049
4050   temp = expand_binop (mode, and_optab, op0,
4051                        immed_wide_int_const (mask, mode),
4052                        result, 1, OPTAB_LIB_WIDEN);
4053   if (temp != result)
4054     emit_move_insn (result, temp);
4055
4056   label = gen_label_rtx ();
4057   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
4058
4059   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
4060                        0, OPTAB_LIB_WIDEN);
4061
4062   mask = wi::mask (logd, true, prec);
4063   temp = expand_binop (mode, ior_optab, temp,
4064                        immed_wide_int_const (mask, mode),
4065                        result, 1, OPTAB_LIB_WIDEN);
4066   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
4067                        0, OPTAB_LIB_WIDEN);
4068   if (temp != result)
4069     emit_move_insn (result, temp);
4070   emit_label (label);
4071   return result;
4072 }
4073
4074 /* Expand signed division of OP0 by a power of two D in mode MODE.
4075    This routine is only called for positive values of D.  */
4076
4077 static rtx
4078 expand_sdiv_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
4079 {
4080   rtx temp;
4081   rtx_code_label *label;
4082   int logd;
4083
4084   logd = floor_log2 (d);
4085
4086   if (d == 2
4087       && BRANCH_COST (optimize_insn_for_speed_p (),
4088                       false) >= 1)
4089     {
4090       temp = gen_reg_rtx (mode);
4091       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
4092       if (temp != NULL_RTX)
4093         {
4094           temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4095                                0, OPTAB_LIB_WIDEN);
4096           return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4097         }
4098     }
4099
4100   if (HAVE_conditional_move
4101       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
4102     {
4103       rtx temp2;
4104
4105       start_sequence ();
4106       temp2 = copy_to_mode_reg (mode, op0);
4107       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
4108                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
4109       temp = force_reg (mode, temp);
4110
4111       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
4112       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
4113                                      mode, temp, temp2, mode, 0);
4114       if (temp2)
4115         {
4116           rtx_insn *seq = get_insns ();
4117           end_sequence ();
4118           emit_insn (seq);
4119           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
4120         }
4121       end_sequence ();
4122     }
4123
4124   if (BRANCH_COST (optimize_insn_for_speed_p (),
4125                    false) >= 2)
4126     {
4127       int ushift = GET_MODE_BITSIZE (mode) - logd;
4128
4129       temp = gen_reg_rtx (mode);
4130       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
4131       if (temp != NULL_RTX)
4132         {
4133           if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
4134               || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
4135               > COSTS_N_INSNS (1))
4136             temp = expand_binop (mode, and_optab, temp,
4137                                  gen_int_mode (d - 1, mode),
4138                                  NULL_RTX, 0, OPTAB_LIB_WIDEN);
4139           else
4140             temp = expand_shift (RSHIFT_EXPR, mode, temp,
4141                                  ushift, NULL_RTX, 1);
4142           temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4143                                0, OPTAB_LIB_WIDEN);
4144           return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4145         }
4146     }
4147
4148   label = gen_label_rtx ();
4149   temp = copy_to_mode_reg (mode, op0);
4150   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
4151   expand_inc (temp, gen_int_mode (d - 1, mode));
4152   emit_label (label);
4153   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4154 }
4155 \f
4156 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
4157    if that is convenient, and returning where the result is.
4158    You may request either the quotient or the remainder as the result;
4159    specify REM_FLAG nonzero to get the remainder.
4160
4161    CODE is the expression code for which kind of division this is;
4162    it controls how rounding is done.  MODE is the machine mode to use.
4163    UNSIGNEDP nonzero means do unsigned division.  */
4164
4165 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4166    and then correct it by or'ing in missing high bits
4167    if result of ANDI is nonzero.
4168    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4169    This could optimize to a bfexts instruction.
4170    But C doesn't use these operations, so their optimizations are
4171    left for later.  */
4172 /* ??? For modulo, we don't actually need the highpart of the first product,
4173    the low part will do nicely.  And for small divisors, the second multiply
4174    can also be a low-part only multiply or even be completely left out.
4175    E.g. to calculate the remainder of a division by 3 with a 32 bit
4176    multiply, multiply with 0x55555556 and extract the upper two bits;
4177    the result is exact for inputs up to 0x1fffffff.
4178    The input range can be reduced by using cross-sum rules.
4179    For odd divisors >= 3, the following table gives right shift counts
4180    so that if a number is shifted by an integer multiple of the given
4181    amount, the remainder stays the same:
4182    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4183    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4184    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4185    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4186    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4187
4188    Cross-sum rules for even numbers can be derived by leaving as many bits
4189    to the right alone as the divisor has zeros to the right.
4190    E.g. if x is an unsigned 32 bit number:
4191    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4192    */
4193
4194 rtx
4195 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4196                rtx op0, rtx op1, rtx target, int unsignedp)
4197 {
4198   machine_mode compute_mode;
4199   rtx tquotient;
4200   rtx quotient = 0, remainder = 0;
4201   rtx_insn *last;
4202   rtx_insn *insn;
4203   optab optab1, optab2;
4204   int op1_is_constant, op1_is_pow2 = 0;
4205   int max_cost, extra_cost;
4206   static HOST_WIDE_INT last_div_const = 0;
4207   bool speed = optimize_insn_for_speed_p ();
4208
4209   op1_is_constant = CONST_INT_P (op1);
4210   if (op1_is_constant)
4211     {
4212       wide_int ext_op1 = rtx_mode_t (op1, mode);
4213       op1_is_pow2 = (wi::popcount (ext_op1) == 1
4214                      || (! unsignedp
4215                          && wi::popcount (wi::neg (ext_op1)) == 1));
4216     }
4217
4218   /*
4219      This is the structure of expand_divmod:
4220
4221      First comes code to fix up the operands so we can perform the operations
4222      correctly and efficiently.
4223
4224      Second comes a switch statement with code specific for each rounding mode.
4225      For some special operands this code emits all RTL for the desired
4226      operation, for other cases, it generates only a quotient and stores it in
4227      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4228      to indicate that it has not done anything.
4229
4230      Last comes code that finishes the operation.  If QUOTIENT is set and
4231      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4232      QUOTIENT is not set, it is computed using trunc rounding.
4233
4234      We try to generate special code for division and remainder when OP1 is a
4235      constant.  If |OP1| = 2**n we can use shifts and some other fast
4236      operations.  For other values of OP1, we compute a carefully selected
4237      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4238      by m.
4239
4240      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4241      half of the product.  Different strategies for generating the product are
4242      implemented in expmed_mult_highpart.
4243
4244      If what we actually want is the remainder, we generate that by another
4245      by-constant multiplication and a subtraction.  */
4246
4247   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4248      code below will malfunction if we are, so check here and handle
4249      the special case if so.  */
4250   if (op1 == const1_rtx)
4251     return rem_flag ? const0_rtx : op0;
4252
4253     /* When dividing by -1, we could get an overflow.
4254      negv_optab can handle overflows.  */
4255   if (! unsignedp && op1 == constm1_rtx)
4256     {
4257       if (rem_flag)
4258         return const0_rtx;
4259       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4260                           ? negv_optab : neg_optab, op0, target, 0);
4261     }
4262
4263   if (target
4264       /* Don't use the function value register as a target
4265          since we have to read it as well as write it,
4266          and function-inlining gets confused by this.  */
4267       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4268           /* Don't clobber an operand while doing a multi-step calculation.  */
4269           || ((rem_flag || op1_is_constant)
4270               && (reg_mentioned_p (target, op0)
4271                   || (MEM_P (op0) && MEM_P (target))))
4272           || reg_mentioned_p (target, op1)
4273           || (MEM_P (op1) && MEM_P (target))))
4274     target = 0;
4275
4276   /* Get the mode in which to perform this computation.  Normally it will
4277      be MODE, but sometimes we can't do the desired operation in MODE.
4278      If so, pick a wider mode in which we can do the operation.  Convert
4279      to that mode at the start to avoid repeated conversions.
4280
4281      First see what operations we need.  These depend on the expression
4282      we are evaluating.  (We assume that divxx3 insns exist under the
4283      same conditions that modxx3 insns and that these insns don't normally
4284      fail.  If these assumptions are not correct, we may generate less
4285      efficient code in some cases.)
4286
4287      Then see if we find a mode in which we can open-code that operation
4288      (either a division, modulus, or shift).  Finally, check for the smallest
4289      mode for which we can do the operation with a library call.  */
4290
4291   /* We might want to refine this now that we have division-by-constant
4292      optimization.  Since expmed_mult_highpart tries so many variants, it is
4293      not straightforward to generalize this.  Maybe we should make an array
4294      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4295
4296   optab1 = (op1_is_pow2
4297             ? (unsignedp ? lshr_optab : ashr_optab)
4298             : (unsignedp ? udiv_optab : sdiv_optab));
4299   optab2 = (op1_is_pow2 ? optab1
4300             : (unsignedp ? udivmod_optab : sdivmod_optab));
4301
4302   FOR_EACH_MODE_FROM (compute_mode, mode)
4303     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4304         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4305       break;
4306
4307   if (compute_mode == VOIDmode)
4308     FOR_EACH_MODE_FROM (compute_mode, mode)
4309       if (optab_libfunc (optab1, compute_mode)
4310           || optab_libfunc (optab2, compute_mode))
4311         break;
4312
4313   /* If we still couldn't find a mode, use MODE, but expand_binop will
4314      probably die.  */
4315   if (compute_mode == VOIDmode)
4316     compute_mode = mode;
4317
4318   if (target && GET_MODE (target) == compute_mode)
4319     tquotient = target;
4320   else
4321     tquotient = gen_reg_rtx (compute_mode);
4322
4323 #if 0
4324   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4325      (mode), and thereby get better code when OP1 is a constant.  Do that
4326      later.  It will require going over all usages of SIZE below.  */
4327   size = GET_MODE_BITSIZE (mode);
4328 #endif
4329
4330   /* Only deduct something for a REM if the last divide done was
4331      for a different constant.   Then set the constant of the last
4332      divide.  */
4333   max_cost = (unsignedp
4334               ? udiv_cost (speed, compute_mode)
4335               : sdiv_cost (speed, compute_mode));
4336   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4337                      && INTVAL (op1) == last_div_const))
4338     max_cost -= (mul_cost (speed, compute_mode)
4339                  + add_cost (speed, compute_mode));
4340
4341   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4342
4343   /* Now convert to the best mode to use.  */
4344   if (compute_mode != mode)
4345     {
4346       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4347       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4348
4349       /* convert_modes may have placed op1 into a register, so we
4350          must recompute the following.  */
4351       op1_is_constant = CONST_INT_P (op1);
4352       if (op1_is_constant)
4353         {
4354           wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4355           op1_is_pow2 = (wi::popcount (ext_op1) == 1
4356                          || (! unsignedp
4357                              && wi::popcount (wi::neg (ext_op1)) == 1));
4358         }
4359       else
4360         op1_is_pow2 = 0;
4361     }
4362
4363   /* If one of the operands is a volatile MEM, copy it into a register.  */
4364
4365   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4366     op0 = force_reg (compute_mode, op0);
4367   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4368     op1 = force_reg (compute_mode, op1);
4369
4370   /* If we need the remainder or if OP1 is constant, we need to
4371      put OP0 in a register in case it has any queued subexpressions.  */
4372   if (rem_flag || op1_is_constant)
4373     op0 = force_reg (compute_mode, op0);
4374
4375   last = get_last_insn ();
4376
4377   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4378   if (unsignedp)
4379     {
4380       if (code == FLOOR_DIV_EXPR)
4381         code = TRUNC_DIV_EXPR;
4382       if (code == FLOOR_MOD_EXPR)
4383         code = TRUNC_MOD_EXPR;
4384       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4385         code = TRUNC_DIV_EXPR;
4386     }
4387
4388   if (op1 != const0_rtx)
4389     switch (code)
4390       {
4391       case TRUNC_MOD_EXPR:
4392       case TRUNC_DIV_EXPR:
4393         if (op1_is_constant)
4394           {
4395             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4396             int size = GET_MODE_BITSIZE (int_mode);
4397             if (unsignedp)
4398               {
4399                 unsigned HOST_WIDE_INT mh, ml;
4400                 int pre_shift, post_shift;
4401                 int dummy;
4402                 wide_int wd = rtx_mode_t (op1, int_mode);
4403                 unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4404
4405                 if (wi::popcount (wd) == 1)
4406                   {
4407                     pre_shift = floor_log2 (d);
4408                     if (rem_flag)
4409                       {
4410                         unsigned HOST_WIDE_INT mask
4411                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4412                         remainder
4413                           = expand_binop (int_mode, and_optab, op0,
4414                                           gen_int_mode (mask, int_mode),
4415                                           remainder, 1,
4416                                           OPTAB_LIB_WIDEN);
4417                         if (remainder)
4418                           return gen_lowpart (mode, remainder);
4419                       }
4420                     quotient = expand_shift (RSHIFT_EXPR, int_mode, op0,
4421                                              pre_shift, tquotient, 1);
4422                   }
4423                 else if (size <= HOST_BITS_PER_WIDE_INT)
4424                   {
4425                     if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4426                       {
4427                         /* Most significant bit of divisor is set; emit an scc
4428                            insn.  */
4429                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4430                                                           int_mode, 1, 1);
4431                       }
4432                     else
4433                       {
4434                         /* Find a suitable multiplier and right shift count
4435                            instead of multiplying with D.  */
4436
4437                         mh = choose_multiplier (d, size, size,
4438                                                 &ml, &post_shift, &dummy);
4439
4440                         /* If the suggested multiplier is more than SIZE bits,
4441                            we can do better for even divisors, using an
4442                            initial right shift.  */
4443                         if (mh != 0 && (d & 1) == 0)
4444                           {
4445                             pre_shift = ctz_or_zero (d);
4446                             mh = choose_multiplier (d >> pre_shift, size,
4447                                                     size - pre_shift,
4448                                                     &ml, &post_shift, &dummy);
4449                             gcc_assert (!mh);
4450                           }
4451                         else
4452                           pre_shift = 0;
4453
4454                         if (mh != 0)
4455                           {
4456                             rtx t1, t2, t3, t4;
4457
4458                             if (post_shift - 1 >= BITS_PER_WORD)
4459                               goto fail1;
4460
4461                             extra_cost
4462                               = (shift_cost (speed, int_mode, post_shift - 1)
4463                                  + shift_cost (speed, int_mode, 1)
4464                                  + 2 * add_cost (speed, int_mode));
4465                             t1 = expmed_mult_highpart
4466                               (int_mode, op0, gen_int_mode (ml, int_mode),
4467                                NULL_RTX, 1, max_cost - extra_cost);
4468                             if (t1 == 0)
4469                               goto fail1;
4470                             t2 = force_operand (gen_rtx_MINUS (int_mode,
4471                                                                op0, t1),
4472                                                 NULL_RTX);
4473                             t3 = expand_shift (RSHIFT_EXPR, int_mode,
4474                                                t2, 1, NULL_RTX, 1);
4475                             t4 = force_operand (gen_rtx_PLUS (int_mode,
4476                                                               t1, t3),
4477                                                 NULL_RTX);
4478                             quotient = expand_shift
4479                               (RSHIFT_EXPR, int_mode, t4,
4480                                post_shift - 1, tquotient, 1);
4481                           }
4482                         else
4483                           {
4484                             rtx t1, t2;
4485
4486                             if (pre_shift >= BITS_PER_WORD
4487                                 || post_shift >= BITS_PER_WORD)
4488                               goto fail1;
4489
4490                             t1 = expand_shift
4491                               (RSHIFT_EXPR, int_mode, op0,
4492                                pre_shift, NULL_RTX, 1);
4493                             extra_cost
4494                               = (shift_cost (speed, int_mode, pre_shift)
4495                                  + shift_cost (speed, int_mode, post_shift));
4496                             t2 = expmed_mult_highpart
4497                               (int_mode, t1,
4498                                gen_int_mode (ml, int_mode),
4499                                NULL_RTX, 1, max_cost - extra_cost);
4500                             if (t2 == 0)
4501                               goto fail1;
4502                             quotient = expand_shift
4503                               (RSHIFT_EXPR, int_mode, t2,
4504                                post_shift, tquotient, 1);
4505                           }
4506                       }
4507                   }
4508                 else            /* Too wide mode to use tricky code */
4509                   break;
4510
4511                 insn = get_last_insn ();
4512                 if (insn != last)
4513                   set_dst_reg_note (insn, REG_EQUAL,
4514                                     gen_rtx_UDIV (int_mode, op0, op1),
4515                                     quotient);
4516               }
4517             else                /* TRUNC_DIV, signed */
4518               {
4519                 unsigned HOST_WIDE_INT ml;
4520                 int lgup, post_shift;
4521                 rtx mlr;
4522                 HOST_WIDE_INT d = INTVAL (op1);
4523                 unsigned HOST_WIDE_INT abs_d;
4524
4525                 /* Not prepared to handle division/remainder by
4526                    0xffffffffffffffff8000000000000000 etc.  */
4527                 if (d == HOST_WIDE_INT_MIN && size > HOST_BITS_PER_WIDE_INT)
4528                   break;
4529
4530                 /* Since d might be INT_MIN, we have to cast to
4531                    unsigned HOST_WIDE_INT before negating to avoid
4532                    undefined signed overflow.  */
4533                 abs_d = (d >= 0
4534                          ? (unsigned HOST_WIDE_INT) d
4535                          : - (unsigned HOST_WIDE_INT) d);
4536
4537                 /* n rem d = n rem -d */
4538                 if (rem_flag && d < 0)
4539                   {
4540                     d = abs_d;
4541                     op1 = gen_int_mode (abs_d, int_mode);
4542                   }
4543
4544                 if (d == 1)
4545                   quotient = op0;
4546                 else if (d == -1)
4547                   quotient = expand_unop (int_mode, neg_optab, op0,
4548                                           tquotient, 0);
4549                 else if (size <= HOST_BITS_PER_WIDE_INT
4550                          && abs_d == HOST_WIDE_INT_1U << (size - 1))
4551                   {
4552                     /* This case is not handled correctly below.  */
4553                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4554                                                 int_mode, 1, 1);
4555                     if (quotient == 0)
4556                       goto fail1;
4557                   }
4558                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4559                          && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4560                          && (rem_flag
4561                              ? smod_pow2_cheap (speed, int_mode)
4562                              : sdiv_pow2_cheap (speed, int_mode))
4563                          /* We assume that cheap metric is true if the
4564                             optab has an expander for this mode.  */
4565                          && ((optab_handler ((rem_flag ? smod_optab
4566                                               : sdiv_optab),
4567                                              int_mode)
4568                               != CODE_FOR_nothing)
4569                              || (optab_handler (sdivmod_optab, int_mode)
4570                                  != CODE_FOR_nothing)))
4571                   ;
4572                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4573                   {
4574                     if (rem_flag)
4575                       {
4576                         remainder = expand_smod_pow2 (int_mode, op0, d);
4577                         if (remainder)
4578                           return gen_lowpart (mode, remainder);
4579                       }
4580
4581                     if (sdiv_pow2_cheap (speed, int_mode)
4582                         && ((optab_handler (sdiv_optab, int_mode)
4583                              != CODE_FOR_nothing)
4584                             || (optab_handler (sdivmod_optab, int_mode)
4585                                 != CODE_FOR_nothing)))
4586                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4587                                                 int_mode, op0,
4588                                                 gen_int_mode (abs_d,
4589                                                               int_mode),
4590                                                 NULL_RTX, 0);
4591                     else
4592                       quotient = expand_sdiv_pow2 (int_mode, op0, abs_d);
4593
4594                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4595                        negate the quotient.  */
4596                     if (d < 0)
4597                       {
4598                         insn = get_last_insn ();
4599                         if (insn != last
4600                             && abs_d < (HOST_WIDE_INT_1U
4601                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4602                           set_dst_reg_note (insn, REG_EQUAL,
4603                                             gen_rtx_DIV (int_mode, op0,
4604                                                          gen_int_mode
4605                                                            (abs_d,
4606                                                             int_mode)),
4607                                             quotient);
4608
4609                         quotient = expand_unop (int_mode, neg_optab,
4610                                                 quotient, quotient, 0);
4611                       }
4612                   }
4613                 else if (size <= HOST_BITS_PER_WIDE_INT)
4614                   {
4615                     choose_multiplier (abs_d, size, size - 1,
4616                                        &ml, &post_shift, &lgup);
4617                     if (ml < HOST_WIDE_INT_1U << (size - 1))
4618                       {
4619                         rtx t1, t2, t3;
4620
4621                         if (post_shift >= BITS_PER_WORD
4622                             || size - 1 >= BITS_PER_WORD)
4623                           goto fail1;
4624
4625                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4626                                       + shift_cost (speed, int_mode, size - 1)
4627                                       + add_cost (speed, int_mode));
4628                         t1 = expmed_mult_highpart
4629                           (int_mode, op0, gen_int_mode (ml, int_mode),
4630                            NULL_RTX, 0, max_cost - extra_cost);
4631                         if (t1 == 0)
4632                           goto fail1;
4633                         t2 = expand_shift
4634                           (RSHIFT_EXPR, int_mode, t1,
4635                            post_shift, NULL_RTX, 0);
4636                         t3 = expand_shift
4637                           (RSHIFT_EXPR, int_mode, op0,
4638                            size - 1, NULL_RTX, 0);
4639                         if (d < 0)
4640                           quotient
4641                             = force_operand (gen_rtx_MINUS (int_mode, t3, t2),
4642                                              tquotient);
4643                         else
4644                           quotient
4645                             = force_operand (gen_rtx_MINUS (int_mode, t2, t3),
4646                                              tquotient);
4647                       }
4648                     else
4649                       {
4650                         rtx t1, t2, t3, t4;
4651
4652                         if (post_shift >= BITS_PER_WORD
4653                             || size - 1 >= BITS_PER_WORD)
4654                           goto fail1;
4655
4656                         ml |= HOST_WIDE_INT_M1U << (size - 1);
4657                         mlr = gen_int_mode (ml, int_mode);
4658                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4659                                       + shift_cost (speed, int_mode, size - 1)
4660                                       + 2 * add_cost (speed, int_mode));
4661                         t1 = expmed_mult_highpart (int_mode, op0, mlr,
4662                                                    NULL_RTX, 0,
4663                                                    max_cost - extra_cost);
4664                         if (t1 == 0)
4665                           goto fail1;
4666                         t2 = force_operand (gen_rtx_PLUS (int_mode, t1, op0),
4667                                             NULL_RTX);
4668                         t3 = expand_shift
4669                           (RSHIFT_EXPR, int_mode, t2,
4670                            post_shift, NULL_RTX, 0);
4671                         t4 = expand_shift
4672                           (RSHIFT_EXPR, int_mode, op0,
4673                            size - 1, NULL_RTX, 0);
4674                         if (d < 0)
4675                           quotient
4676                             = force_operand (gen_rtx_MINUS (int_mode, t4, t3),
4677                                              tquotient);
4678                         else
4679                           quotient
4680                             = force_operand (gen_rtx_MINUS (int_mode, t3, t4),
4681                                              tquotient);
4682                       }
4683                   }
4684                 else            /* Too wide mode to use tricky code */
4685                   break;
4686
4687                 insn = get_last_insn ();
4688                 if (insn != last)
4689                   set_dst_reg_note (insn, REG_EQUAL,
4690                                     gen_rtx_DIV (int_mode, op0, op1),
4691                                     quotient);
4692               }
4693             break;
4694           }
4695       fail1:
4696         delete_insns_since (last);
4697         break;
4698
4699       case FLOOR_DIV_EXPR:
4700       case FLOOR_MOD_EXPR:
4701       /* We will come here only for signed operations.  */
4702         if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4703           {
4704             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4705             int size = GET_MODE_BITSIZE (int_mode);
4706             unsigned HOST_WIDE_INT mh, ml;
4707             int pre_shift, lgup, post_shift;
4708             HOST_WIDE_INT d = INTVAL (op1);
4709
4710             if (d > 0)
4711               {
4712                 /* We could just as easily deal with negative constants here,
4713                    but it does not seem worth the trouble for GCC 2.6.  */
4714                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4715                   {
4716                     pre_shift = floor_log2 (d);
4717                     if (rem_flag)
4718                       {
4719                         unsigned HOST_WIDE_INT mask
4720                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4721                         remainder = expand_binop
4722                           (int_mode, and_optab, op0,
4723                            gen_int_mode (mask, int_mode),
4724                            remainder, 0, OPTAB_LIB_WIDEN);
4725                         if (remainder)
4726                           return gen_lowpart (mode, remainder);
4727                       }
4728                     quotient = expand_shift
4729                       (RSHIFT_EXPR, int_mode, op0,
4730                        pre_shift, tquotient, 0);
4731                   }
4732                 else
4733                   {
4734                     rtx t1, t2, t3, t4;
4735
4736                     mh = choose_multiplier (d, size, size - 1,
4737                                             &ml, &post_shift, &lgup);
4738                     gcc_assert (!mh);
4739
4740                     if (post_shift < BITS_PER_WORD
4741                         && size - 1 < BITS_PER_WORD)
4742                       {
4743                         t1 = expand_shift
4744                           (RSHIFT_EXPR, int_mode, op0,
4745                            size - 1, NULL_RTX, 0);
4746                         t2 = expand_binop (int_mode, xor_optab, op0, t1,
4747                                            NULL_RTX, 0, OPTAB_WIDEN);
4748                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4749                                       + shift_cost (speed, int_mode, size - 1)
4750                                       + 2 * add_cost (speed, int_mode));
4751                         t3 = expmed_mult_highpart
4752                           (int_mode, t2, gen_int_mode (ml, int_mode),
4753                            NULL_RTX, 1, max_cost - extra_cost);
4754                         if (t3 != 0)
4755                           {
4756                             t4 = expand_shift
4757                               (RSHIFT_EXPR, int_mode, t3,
4758                                post_shift, NULL_RTX, 1);
4759                             quotient = expand_binop (int_mode, xor_optab,
4760                                                      t4, t1, tquotient, 0,
4761                                                      OPTAB_WIDEN);
4762                           }
4763                       }
4764                   }
4765               }
4766             else
4767               {
4768                 rtx nsign, t1, t2, t3, t4;
4769                 t1 = force_operand (gen_rtx_PLUS (int_mode,
4770                                                   op0, constm1_rtx), NULL_RTX);
4771                 t2 = expand_binop (int_mode, ior_optab, op0, t1, NULL_RTX,
4772                                    0, OPTAB_WIDEN);
4773                 nsign = expand_shift (RSHIFT_EXPR, int_mode, t2,
4774                                       size - 1, NULL_RTX, 0);
4775                 t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign),
4776                                     NULL_RTX);
4777                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, t3, op1,
4778                                     NULL_RTX, 0);
4779                 if (t4)
4780                   {
4781                     rtx t5;
4782                     t5 = expand_unop (int_mode, one_cmpl_optab, nsign,
4783                                       NULL_RTX, 0);
4784                     quotient = force_operand (gen_rtx_PLUS (int_mode, t4, t5),
4785                                               tquotient);
4786                   }
4787               }
4788           }
4789
4790         if (quotient != 0)
4791           break;
4792         delete_insns_since (last);
4793
4794         /* Try using an instruction that produces both the quotient and
4795            remainder, using truncation.  We can easily compensate the quotient
4796            or remainder to get floor rounding, once we have the remainder.
4797            Notice that we compute also the final remainder value here,
4798            and return the result right away.  */
4799         if (target == 0 || GET_MODE (target) != compute_mode)
4800           target = gen_reg_rtx (compute_mode);
4801
4802         if (rem_flag)
4803           {
4804             remainder
4805               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4806             quotient = gen_reg_rtx (compute_mode);
4807           }
4808         else
4809           {
4810             quotient
4811               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4812             remainder = gen_reg_rtx (compute_mode);
4813           }
4814
4815         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4816                                  quotient, remainder, 0))
4817           {
4818             /* This could be computed with a branch-less sequence.
4819                Save that for later.  */
4820             rtx tem;
4821             rtx_code_label *label = gen_label_rtx ();
4822             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4823             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4824                                 NULL_RTX, 0, OPTAB_WIDEN);
4825             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4826             expand_dec (quotient, const1_rtx);
4827             expand_inc (remainder, op1);
4828             emit_label (label);
4829             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4830           }
4831
4832         /* No luck with division elimination or divmod.  Have to do it
4833            by conditionally adjusting op0 *and* the result.  */
4834         {
4835           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4836           rtx adjusted_op0;
4837           rtx tem;
4838
4839           quotient = gen_reg_rtx (compute_mode);
4840           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4841           label1 = gen_label_rtx ();
4842           label2 = gen_label_rtx ();
4843           label3 = gen_label_rtx ();
4844           label4 = gen_label_rtx ();
4845           label5 = gen_label_rtx ();
4846           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4847           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4848           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4849                               quotient, 0, OPTAB_LIB_WIDEN);
4850           if (tem != quotient)
4851             emit_move_insn (quotient, tem);
4852           emit_jump_insn (targetm.gen_jump (label5));
4853           emit_barrier ();
4854           emit_label (label1);
4855           expand_inc (adjusted_op0, const1_rtx);
4856           emit_jump_insn (targetm.gen_jump (label4));
4857           emit_barrier ();
4858           emit_label (label2);
4859           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4860           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4861                               quotient, 0, OPTAB_LIB_WIDEN);
4862           if (tem != quotient)
4863             emit_move_insn (quotient, tem);
4864           emit_jump_insn (targetm.gen_jump (label5));
4865           emit_barrier ();
4866           emit_label (label3);
4867           expand_dec (adjusted_op0, const1_rtx);
4868           emit_label (label4);
4869           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4870                               quotient, 0, OPTAB_LIB_WIDEN);
4871           if (tem != quotient)
4872             emit_move_insn (quotient, tem);
4873           expand_dec (quotient, const1_rtx);
4874           emit_label (label5);
4875         }
4876         break;
4877
4878       case CEIL_DIV_EXPR:
4879       case CEIL_MOD_EXPR:
4880         if (unsignedp)
4881           {
4882             if (op1_is_constant
4883                 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4884                 && (HWI_COMPUTABLE_MODE_P (compute_mode)
4885                     || INTVAL (op1) >= 0))
4886               {
4887                 scalar_int_mode int_mode
4888                   = as_a <scalar_int_mode> (compute_mode);
4889                 rtx t1, t2, t3;
4890                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4891                 t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4892                                    floor_log2 (d), tquotient, 1);
4893                 t2 = expand_binop (int_mode, and_optab, op0,
4894                                    gen_int_mode (d - 1, int_mode),
4895                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4896                 t3 = gen_reg_rtx (int_mode);
4897                 t3 = emit_store_flag (t3, NE, t2, const0_rtx, int_mode, 1, 1);
4898                 if (t3 == 0)
4899                   {
4900                     rtx_code_label *lab;
4901                     lab = gen_label_rtx ();
4902                     do_cmp_and_jump (t2, const0_rtx, EQ, int_mode, lab);
4903                     expand_inc (t1, const1_rtx);
4904                     emit_label (lab);
4905                     quotient = t1;
4906                   }
4907                 else
4908                   quotient = force_operand (gen_rtx_PLUS (int_mode, t1, t3),
4909                                             tquotient);
4910                 break;
4911               }
4912
4913             /* Try using an instruction that produces both the quotient and
4914                remainder, using truncation.  We can easily compensate the
4915                quotient or remainder to get ceiling rounding, once we have the
4916                remainder.  Notice that we compute also the final remainder
4917                value here, and return the result right away.  */
4918             if (target == 0 || GET_MODE (target) != compute_mode)
4919               target = gen_reg_rtx (compute_mode);
4920
4921             if (rem_flag)
4922               {
4923                 remainder = (REG_P (target)
4924                              ? target : gen_reg_rtx (compute_mode));
4925                 quotient = gen_reg_rtx (compute_mode);
4926               }
4927             else
4928               {
4929                 quotient = (REG_P (target)
4930                             ? target : gen_reg_rtx (compute_mode));
4931                 remainder = gen_reg_rtx (compute_mode);
4932               }
4933
4934             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4935                                      remainder, 1))
4936               {
4937                 /* This could be computed with a branch-less sequence.
4938                    Save that for later.  */
4939                 rtx_code_label *label = gen_label_rtx ();
4940                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4941                                  compute_mode, label);
4942                 expand_inc (quotient, const1_rtx);
4943                 expand_dec (remainder, op1);
4944                 emit_label (label);
4945                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4946               }
4947
4948             /* No luck with division elimination or divmod.  Have to do it
4949                by conditionally adjusting op0 *and* the result.  */
4950             {
4951               rtx_code_label *label1, *label2;
4952               rtx adjusted_op0, tem;
4953
4954               quotient = gen_reg_rtx (compute_mode);
4955               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4956               label1 = gen_label_rtx ();
4957               label2 = gen_label_rtx ();
4958               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4959                                compute_mode, label1);
4960               emit_move_insn  (quotient, const0_rtx);
4961               emit_jump_insn (targetm.gen_jump (label2));
4962               emit_barrier ();
4963               emit_label (label1);
4964               expand_dec (adjusted_op0, const1_rtx);
4965               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4966                                   quotient, 1, OPTAB_LIB_WIDEN);
4967               if (tem != quotient)
4968                 emit_move_insn (quotient, tem);
4969               expand_inc (quotient, const1_rtx);
4970               emit_label (label2);
4971             }
4972           }
4973         else /* signed */
4974           {
4975             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4976                 && INTVAL (op1) >= 0)
4977               {
4978                 /* This is extremely similar to the code for the unsigned case
4979                    above.  For 2.7 we should merge these variants, but for
4980                    2.6.1 I don't want to touch the code for unsigned since that
4981                    get used in C.  The signed case will only be used by other
4982                    languages (Ada).  */
4983
4984                 rtx t1, t2, t3;
4985                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4986                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4987                                    floor_log2 (d), tquotient, 0);
4988                 t2 = expand_binop (compute_mode, and_optab, op0,
4989                                    gen_int_mode (d - 1, compute_mode),
4990                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4991                 t3 = gen_reg_rtx (compute_mode);
4992                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4993                                       compute_mode, 1, 1);
4994                 if (t3 == 0)
4995                   {
4996                     rtx_code_label *lab;
4997                     lab = gen_label_rtx ();
4998                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4999                     expand_inc (t1, const1_rtx);
5000                     emit_label (lab);
5001                     quotient = t1;
5002                   }
5003                 else
5004                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
5005                                                           t1, t3),
5006                                             tquotient);
5007                 break;
5008               }
5009
5010             /* Try using an instruction that produces both the quotient and
5011                remainder, using truncation.  We can easily compensate the
5012                quotient or remainder to get ceiling rounding, once we have the
5013                remainder.  Notice that we compute also the final remainder
5014                value here, and return the result right away.  */
5015             if (target == 0 || GET_MODE (target) != compute_mode)
5016               target = gen_reg_rtx (compute_mode);
5017             if (rem_flag)
5018               {
5019                 remainder= (REG_P (target)
5020                             ? target : gen_reg_rtx (compute_mode));
5021                 quotient = gen_reg_rtx (compute_mode);
5022               }
5023             else
5024               {
5025                 quotient = (REG_P (target)
5026                             ? target : gen_reg_rtx (compute_mode));
5027                 remainder = gen_reg_rtx (compute_mode);
5028               }
5029
5030             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
5031                                      remainder, 0))
5032               {
5033                 /* This could be computed with a branch-less sequence.
5034                    Save that for later.  */
5035                 rtx tem;
5036                 rtx_code_label *label = gen_label_rtx ();
5037                 do_cmp_and_jump (remainder, const0_rtx, EQ,
5038                                  compute_mode, label);
5039                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
5040                                     NULL_RTX, 0, OPTAB_WIDEN);
5041                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
5042                 expand_inc (quotient, const1_rtx);
5043                 expand_dec (remainder, op1);
5044                 emit_label (label);
5045                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5046               }
5047
5048             /* No luck with division elimination or divmod.  Have to do it
5049                by conditionally adjusting op0 *and* the result.  */
5050             {
5051               rtx_code_label *label1, *label2, *label3, *label4, *label5;
5052               rtx adjusted_op0;
5053               rtx tem;
5054
5055               quotient = gen_reg_rtx (compute_mode);
5056               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
5057               label1 = gen_label_rtx ();
5058               label2 = gen_label_rtx ();
5059               label3 = gen_label_rtx ();
5060               label4 = gen_label_rtx ();
5061               label5 = gen_label_rtx ();
5062               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
5063               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
5064                                compute_mode, label1);
5065               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5066                                   quotient, 0, OPTAB_LIB_WIDEN);
5067               if (tem != quotient)
5068                 emit_move_insn (quotient, tem);
5069               emit_jump_insn (targetm.gen_jump (label5));
5070               emit_barrier ();
5071               emit_label (label1);
5072               expand_dec (adjusted_op0, const1_rtx);
5073               emit_jump_insn (targetm.gen_jump (label4));
5074               emit_barrier ();
5075               emit_label (label2);
5076               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
5077                                compute_mode, label3);
5078               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5079                                   quotient, 0, OPTAB_LIB_WIDEN);
5080               if (tem != quotient)
5081                 emit_move_insn (quotient, tem);
5082               emit_jump_insn (targetm.gen_jump (label5));
5083               emit_barrier ();
5084               emit_label (label3);
5085               expand_inc (adjusted_op0, const1_rtx);
5086               emit_label (label4);
5087               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5088                                   quotient, 0, OPTAB_LIB_WIDEN);
5089               if (tem != quotient)
5090                 emit_move_insn (quotient, tem);
5091               expand_inc (quotient, const1_rtx);
5092               emit_label (label5);
5093             }
5094           }
5095         break;
5096
5097       case EXACT_DIV_EXPR:
5098         if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
5099           {
5100             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5101             int size = GET_MODE_BITSIZE (int_mode);
5102             HOST_WIDE_INT d = INTVAL (op1);
5103             unsigned HOST_WIDE_INT ml;
5104             int pre_shift;
5105             rtx t1;
5106
5107             pre_shift = ctz_or_zero (d);
5108             ml = invert_mod2n (d >> pre_shift, size);
5109             t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5110                                pre_shift, NULL_RTX, unsignedp);
5111             quotient = expand_mult (int_mode, t1, gen_int_mode (ml, int_mode),
5112                                     NULL_RTX, 1);
5113
5114             insn = get_last_insn ();
5115             set_dst_reg_note (insn, REG_EQUAL,
5116                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5117                                               int_mode, op0, op1),
5118                               quotient);
5119           }
5120         break;
5121
5122       case ROUND_DIV_EXPR:
5123       case ROUND_MOD_EXPR:
5124         if (unsignedp)
5125           {
5126             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5127             rtx tem;
5128             rtx_code_label *label;
5129             label = gen_label_rtx ();
5130             quotient = gen_reg_rtx (int_mode);
5131             remainder = gen_reg_rtx (int_mode);
5132             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
5133               {
5134                 rtx tem;
5135                 quotient = expand_binop (int_mode, udiv_optab, op0, op1,
5136                                          quotient, 1, OPTAB_LIB_WIDEN);
5137                 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 1);
5138                 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5139                                           remainder, 1, OPTAB_LIB_WIDEN);
5140               }
5141             tem = plus_constant (int_mode, op1, -1);
5142             tem = expand_shift (RSHIFT_EXPR, int_mode, tem, 1, NULL_RTX, 1);
5143             do_cmp_and_jump (remainder, tem, LEU, int_mode, label);
5144             expand_inc (quotient, const1_rtx);
5145             expand_dec (remainder, op1);
5146             emit_label (label);
5147           }
5148         else
5149           {
5150             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5151             int size = GET_MODE_BITSIZE (int_mode);
5152             rtx abs_rem, abs_op1, tem, mask;
5153             rtx_code_label *label;
5154             label = gen_label_rtx ();
5155             quotient = gen_reg_rtx (int_mode);
5156             remainder = gen_reg_rtx (int_mode);
5157             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
5158               {
5159                 rtx tem;
5160                 quotient = expand_binop (int_mode, sdiv_optab, op0, op1,
5161                                          quotient, 0, OPTAB_LIB_WIDEN);
5162                 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 0);
5163                 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5164                                           remainder, 0, OPTAB_LIB_WIDEN);
5165               }
5166             abs_rem = expand_abs (int_mode, remainder, NULL_RTX, 1, 0);
5167             abs_op1 = expand_abs (int_mode, op1, NULL_RTX, 1, 0);
5168             tem = expand_shift (LSHIFT_EXPR, int_mode, abs_rem,
5169                                 1, NULL_RTX, 1);
5170             do_cmp_and_jump (tem, abs_op1, LTU, int_mode, label);
5171             tem = expand_binop (int_mode, xor_optab, op0, op1,
5172                                 NULL_RTX, 0, OPTAB_WIDEN);
5173             mask = expand_shift (RSHIFT_EXPR, int_mode, tem,
5174                                  size - 1, NULL_RTX, 0);
5175             tem = expand_binop (int_mode, xor_optab, mask, const1_rtx,
5176                                 NULL_RTX, 0, OPTAB_WIDEN);
5177             tem = expand_binop (int_mode, sub_optab, tem, mask,
5178                                 NULL_RTX, 0, OPTAB_WIDEN);
5179             expand_inc (quotient, tem);
5180             tem = expand_binop (int_mode, xor_optab, mask, op1,
5181                                 NULL_RTX, 0, OPTAB_WIDEN);
5182             tem = expand_binop (int_mode, sub_optab, tem, mask,
5183                                 NULL_RTX, 0, OPTAB_WIDEN);
5184             expand_dec (remainder, tem);
5185             emit_label (label);
5186           }
5187         return gen_lowpart (mode, rem_flag ? remainder : quotient);
5188
5189       default:
5190         gcc_unreachable ();
5191       }
5192
5193   if (quotient == 0)
5194     {
5195       if (target && GET_MODE (target) != compute_mode)
5196         target = 0;
5197
5198       if (rem_flag)
5199         {
5200           /* Try to produce the remainder without producing the quotient.
5201              If we seem to have a divmod pattern that does not require widening,
5202              don't try widening here.  We should really have a WIDEN argument
5203              to expand_twoval_binop, since what we'd really like to do here is
5204              1) try a mod insn in compute_mode
5205              2) try a divmod insn in compute_mode
5206              3) try a div insn in compute_mode and multiply-subtract to get
5207                 remainder
5208              4) try the same things with widening allowed.  */
5209           remainder
5210             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5211                                  op0, op1, target,
5212                                  unsignedp,
5213                                  ((optab_handler (optab2, compute_mode)
5214                                    != CODE_FOR_nothing)
5215                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
5216           if (remainder == 0)
5217             {
5218               /* No luck there.  Can we do remainder and divide at once
5219                  without a library call?  */
5220               remainder = gen_reg_rtx (compute_mode);
5221               if (! expand_twoval_binop ((unsignedp
5222                                           ? udivmod_optab
5223                                           : sdivmod_optab),
5224                                          op0, op1,
5225                                          NULL_RTX, remainder, unsignedp))
5226                 remainder = 0;
5227             }
5228
5229           if (remainder)
5230             return gen_lowpart (mode, remainder);
5231         }
5232
5233       /* Produce the quotient.  Try a quotient insn, but not a library call.
5234          If we have a divmod in this mode, use it in preference to widening
5235          the div (for this test we assume it will not fail). Note that optab2
5236          is set to the one of the two optabs that the call below will use.  */
5237       quotient
5238         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5239                              op0, op1, rem_flag ? NULL_RTX : target,
5240                              unsignedp,
5241                              ((optab_handler (optab2, compute_mode)
5242                                != CODE_FOR_nothing)
5243                               ? OPTAB_DIRECT : OPTAB_WIDEN));
5244
5245       if (quotient == 0)
5246         {
5247           /* No luck there.  Try a quotient-and-remainder insn,
5248              keeping the quotient alone.  */
5249           quotient = gen_reg_rtx (compute_mode);
5250           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5251                                      op0, op1,
5252                                      quotient, NULL_RTX, unsignedp))
5253             {
5254               quotient = 0;
5255               if (! rem_flag)
5256                 /* Still no luck.  If we are not computing the remainder,
5257                    use a library call for the quotient.  */
5258                 quotient = sign_expand_binop (compute_mode,
5259                                               udiv_optab, sdiv_optab,
5260                                               op0, op1, target,
5261                                               unsignedp, OPTAB_LIB_WIDEN);
5262             }
5263         }
5264     }
5265
5266   if (rem_flag)
5267     {
5268       if (target && GET_MODE (target) != compute_mode)
5269         target = 0;
5270
5271       if (quotient == 0)
5272         {
5273           /* No divide instruction either.  Use library for remainder.  */
5274           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5275                                          op0, op1, target,
5276                                          unsignedp, OPTAB_LIB_WIDEN);
5277           /* No remainder function.  Try a quotient-and-remainder
5278              function, keeping the remainder.  */
5279           if (!remainder)
5280             {
5281               remainder = gen_reg_rtx (compute_mode);
5282               if (!expand_twoval_binop_libfunc
5283                   (unsignedp ? udivmod_optab : sdivmod_optab,
5284                    op0, op1,
5285                    NULL_RTX, remainder,
5286                    unsignedp ? UMOD : MOD))
5287                 remainder = NULL_RTX;
5288             }
5289         }
5290       else
5291         {
5292           /* We divided.  Now finish doing X - Y * (X / Y).  */
5293           remainder = expand_mult (compute_mode, quotient, op1,
5294                                    NULL_RTX, unsignedp);
5295           remainder = expand_binop (compute_mode, sub_optab, op0,
5296                                     remainder, target, unsignedp,
5297                                     OPTAB_LIB_WIDEN);
5298         }
5299     }
5300
5301   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5302 }
5303 \f
5304 /* Return a tree node with data type TYPE, describing the value of X.
5305    Usually this is an VAR_DECL, if there is no obvious better choice.
5306    X may be an expression, however we only support those expressions
5307    generated by loop.c.  */
5308
5309 tree
5310 make_tree (tree type, rtx x)
5311 {
5312   tree t;
5313
5314   switch (GET_CODE (x))
5315     {
5316     case CONST_INT:
5317     case CONST_WIDE_INT:
5318       t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5319       return t;
5320
5321     case CONST_DOUBLE:
5322       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5323       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5324         t = wide_int_to_tree (type,
5325                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5326                                                     HOST_BITS_PER_WIDE_INT * 2));
5327       else
5328         t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5329
5330       return t;
5331
5332     case CONST_VECTOR:
5333       {
5334         unsigned int npatterns = CONST_VECTOR_NPATTERNS (x);
5335         unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
5336         tree itype = TREE_TYPE (type);
5337
5338         /* Build a tree with vector elements.  */
5339         tree_vector_builder elts (type, npatterns, nelts_per_pattern);
5340         unsigned int count = elts.encoded_nelts ();
5341         for (unsigned int i = 0; i < count; ++i)
5342           {
5343             rtx elt = CONST_VECTOR_ELT (x, i);
5344             elts.quick_push (make_tree (itype, elt));
5345           }
5346
5347         return elts.build ();
5348       }
5349
5350     case PLUS:
5351       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5352                           make_tree (type, XEXP (x, 1)));
5353
5354     case MINUS:
5355       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5356                           make_tree (type, XEXP (x, 1)));
5357
5358     case NEG:
5359       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5360
5361     case MULT:
5362       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5363                           make_tree (type, XEXP (x, 1)));
5364
5365     case ASHIFT:
5366       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5367                           make_tree (type, XEXP (x, 1)));
5368
5369     case LSHIFTRT:
5370       t = unsigned_type_for (type);
5371       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5372                                          make_tree (t, XEXP (x, 0)),
5373                                          make_tree (type, XEXP (x, 1))));
5374
5375     case ASHIFTRT:
5376       t = signed_type_for (type);
5377       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5378                                          make_tree (t, XEXP (x, 0)),
5379                                          make_tree (type, XEXP (x, 1))));
5380
5381     case DIV:
5382       if (TREE_CODE (type) != REAL_TYPE)
5383         t = signed_type_for (type);
5384       else
5385         t = type;
5386
5387       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5388                                          make_tree (t, XEXP (x, 0)),
5389                                          make_tree (t, XEXP (x, 1))));
5390     case UDIV:
5391       t = unsigned_type_for (type);
5392       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5393                                          make_tree (t, XEXP (x, 0)),
5394                                          make_tree (t, XEXP (x, 1))));
5395
5396     case SIGN_EXTEND:
5397     case ZERO_EXTEND:
5398       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5399                                           GET_CODE (x) == ZERO_EXTEND);
5400       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5401
5402     case CONST:
5403       return make_tree (type, XEXP (x, 0));
5404
5405     case SYMBOL_REF:
5406       t = SYMBOL_REF_DECL (x);
5407       if (t)
5408         return fold_convert (type, build_fold_addr_expr (t));
5409       /* fall through.  */
5410
5411     default:
5412       if (CONST_POLY_INT_P (x))
5413         return wide_int_to_tree (t, const_poly_int_value (x));
5414
5415       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5416
5417       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5418          address mode to pointer mode.  */
5419       if (POINTER_TYPE_P (type))
5420         x = convert_memory_address_addr_space
5421           (SCALAR_INT_TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5422
5423       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5424          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5425       t->decl_with_rtl.rtl = x;
5426
5427       return t;
5428     }
5429 }
5430 \f
5431 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5432    and returning TARGET.
5433
5434    If TARGET is 0, a pseudo-register or constant is returned.  */
5435
5436 rtx
5437 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5438 {
5439   rtx tem = 0;
5440
5441   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5442     tem = simplify_binary_operation (AND, mode, op0, op1);
5443   if (tem == 0)
5444     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5445
5446   if (target == 0)
5447     target = tem;
5448   else if (tem != target)
5449     emit_move_insn (target, tem);
5450   return target;
5451 }
5452
5453 /* Helper function for emit_store_flag.  */
5454 rtx
5455 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5456              machine_mode mode, machine_mode compare_mode,
5457              int unsignedp, rtx x, rtx y, int normalizep,
5458              machine_mode target_mode)
5459 {
5460   class expand_operand ops[4];
5461   rtx op0, comparison, subtarget;
5462   rtx_insn *last;
5463   scalar_int_mode result_mode = targetm.cstore_mode (icode);
5464   scalar_int_mode int_target_mode;
5465
5466   last = get_last_insn ();
5467   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5468   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5469   if (!x || !y)
5470     {
5471       delete_insns_since (last);
5472       return NULL_RTX;
5473     }
5474
5475   if (target_mode == VOIDmode)
5476     int_target_mode = result_mode;
5477   else
5478     int_target_mode = as_a <scalar_int_mode> (target_mode);
5479   if (!target)
5480     target = gen_reg_rtx (int_target_mode);
5481
5482   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5483
5484   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5485   create_fixed_operand (&ops[1], comparison);
5486   create_fixed_operand (&ops[2], x);
5487   create_fixed_operand (&ops[3], y);
5488   if (!maybe_expand_insn (icode, 4, ops))
5489     {
5490       delete_insns_since (last);
5491       return NULL_RTX;
5492     }
5493   subtarget = ops[0].value;
5494
5495   /* If we are converting to a wider mode, first convert to
5496      INT_TARGET_MODE, then normalize.  This produces better combining
5497      opportunities on machines that have a SIGN_EXTRACT when we are
5498      testing a single bit.  This mostly benefits the 68k.
5499
5500      If STORE_FLAG_VALUE does not have the sign bit set when
5501      interpreted in MODE, we can do this conversion as unsigned, which
5502      is usually more efficient.  */
5503   if (GET_MODE_PRECISION (int_target_mode) > GET_MODE_PRECISION (result_mode))
5504     {
5505       gcc_assert (GET_MODE_PRECISION (result_mode) != 1
5506                   || STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1);
5507
5508       bool unsignedp = (STORE_FLAG_VALUE >= 0);
5509       convert_move (target, subtarget, unsignedp);
5510
5511       op0 = target;
5512       result_mode = int_target_mode;
5513     }
5514   else
5515     op0 = subtarget;
5516
5517   /* If we want to keep subexpressions around, don't reuse our last
5518      target.  */
5519   if (optimize)
5520     subtarget = 0;
5521
5522   /* Now normalize to the proper value in MODE.  Sometimes we don't
5523      have to do anything.  */
5524   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5525     ;
5526   /* STORE_FLAG_VALUE might be the most negative number, so write
5527      the comparison this way to avoid a compiler-time warning.  */
5528   else if (- normalizep == STORE_FLAG_VALUE)
5529     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5530
5531   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5532      it hard to use a value of just the sign bit due to ANSI integer
5533      constant typing rules.  */
5534   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5535     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5536                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5537                         normalizep == 1);
5538   else
5539     {
5540       gcc_assert (STORE_FLAG_VALUE & 1);
5541
5542       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5543       if (normalizep == -1)
5544         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5545     }
5546
5547   /* If we were converting to a smaller mode, do the conversion now.  */
5548   if (int_target_mode != result_mode)
5549     {
5550       convert_move (target, op0, 0);
5551       return target;
5552     }
5553   else
5554     return op0;
5555 }
5556
5557
5558 /* A subroutine of emit_store_flag only including "tricks" that do not
5559    need a recursive call.  These are kept separate to avoid infinite
5560    loops.  */
5561
5562 static rtx
5563 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5564                    machine_mode mode, int unsignedp, int normalizep,
5565                    machine_mode target_mode)
5566 {
5567   rtx subtarget;
5568   enum insn_code icode;
5569   machine_mode compare_mode;
5570   enum mode_class mclass;
5571   enum rtx_code scode;
5572
5573   if (unsignedp)
5574     code = unsigned_condition (code);
5575   scode = swap_condition (code);
5576
5577   /* If one operand is constant, make it the second one.  Only do this
5578      if the other operand is not constant as well.  */
5579
5580   if (swap_commutative_operands_p (op0, op1))
5581     {
5582       std::swap (op0, op1);
5583       code = swap_condition (code);
5584     }
5585
5586   if (mode == VOIDmode)
5587     mode = GET_MODE (op0);
5588
5589   if (CONST_SCALAR_INT_P (op1))
5590     canonicalize_comparison (mode, &code, &op1);
5591
5592   /* For some comparisons with 1 and -1, we can convert this to
5593      comparisons with zero.  This will often produce more opportunities for
5594      store-flag insns.  */
5595
5596   switch (code)
5597     {
5598     case LT:
5599       if (op1 == const1_rtx)
5600         op1 = const0_rtx, code = LE;
5601       break;
5602     case LE:
5603       if (op1 == constm1_rtx)
5604         op1 = const0_rtx, code = LT;
5605       break;
5606     case GE:
5607       if (op1 == const1_rtx)
5608         op1 = const0_rtx, code = GT;
5609       break;
5610     case GT:
5611       if (op1 == constm1_rtx)
5612         op1 = const0_rtx, code = GE;
5613       break;
5614     case GEU:
5615       if (op1 == const1_rtx)
5616         op1 = const0_rtx, code = NE;
5617       break;
5618     case LTU:
5619       if (op1 == const1_rtx)
5620         op1 = const0_rtx, code = EQ;
5621       break;
5622     default:
5623       break;
5624     }
5625
5626   /* If we are comparing a double-word integer with zero or -1, we can
5627      convert the comparison into one involving a single word.  */
5628   scalar_int_mode int_mode;
5629   if (is_int_mode (mode, &int_mode)
5630       && GET_MODE_BITSIZE (int_mode) == BITS_PER_WORD * 2
5631       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5632     {
5633       rtx tem;
5634       if ((code == EQ || code == NE)
5635           && (op1 == const0_rtx || op1 == constm1_rtx))
5636         {
5637           rtx op00, op01;
5638
5639           /* Do a logical OR or AND of the two words and compare the
5640              result.  */
5641           op00 = simplify_gen_subreg (word_mode, op0, int_mode, 0);
5642           op01 = simplify_gen_subreg (word_mode, op0, int_mode, UNITS_PER_WORD);
5643           tem = expand_binop (word_mode,
5644                               op1 == const0_rtx ? ior_optab : and_optab,
5645                               op00, op01, NULL_RTX, unsignedp,
5646                               OPTAB_DIRECT);
5647
5648           if (tem != 0)
5649             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5650                                    unsignedp, normalizep);
5651         }
5652       else if ((code == LT || code == GE) && op1 == const0_rtx)
5653         {
5654           rtx op0h;
5655
5656           /* If testing the sign bit, can just test on high word.  */
5657           op0h = simplify_gen_subreg (word_mode, op0, int_mode,
5658                                       subreg_highpart_offset (word_mode,
5659                                                               int_mode));
5660           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5661                                  unsignedp, normalizep);
5662         }
5663       else
5664         tem = NULL_RTX;
5665
5666       if (tem)
5667         {
5668           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5669             return tem;
5670           if (!target)
5671             target = gen_reg_rtx (target_mode);
5672
5673           convert_move (target, tem,
5674                         !val_signbit_known_set_p (word_mode,
5675                                                   (normalizep ? normalizep
5676                                                    : STORE_FLAG_VALUE)));
5677           return target;
5678         }
5679     }
5680
5681   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5682      complement of A (for GE) and shifting the sign bit to the low bit.  */
5683   if (op1 == const0_rtx && (code == LT || code == GE)
5684       && is_int_mode (mode, &int_mode)
5685       && (normalizep || STORE_FLAG_VALUE == 1
5686           || val_signbit_p (int_mode, STORE_FLAG_VALUE)))
5687     {
5688       scalar_int_mode int_target_mode;
5689       subtarget = target;
5690
5691       if (!target)
5692         int_target_mode = int_mode;
5693       else
5694         {
5695           /* If the result is to be wider than OP0, it is best to convert it
5696              first.  If it is to be narrower, it is *incorrect* to convert it
5697              first.  */
5698           int_target_mode = as_a <scalar_int_mode> (target_mode);
5699           if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (int_mode))
5700             {
5701               op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5702               int_mode = int_target_mode;
5703             }
5704         }
5705
5706       if (int_target_mode != int_mode)
5707         subtarget = 0;
5708
5709       if (code == GE)
5710         op0 = expand_unop (int_mode, one_cmpl_optab, op0,
5711                            ((STORE_FLAG_VALUE == 1 || normalizep)
5712                             ? 0 : subtarget), 0);
5713
5714       if (STORE_FLAG_VALUE == 1 || normalizep)
5715         /* If we are supposed to produce a 0/1 value, we want to do
5716            a logical shift from the sign bit to the low-order bit; for
5717            a -1/0 value, we do an arithmetic shift.  */
5718         op0 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5719                             GET_MODE_BITSIZE (int_mode) - 1,
5720                             subtarget, normalizep != -1);
5721
5722       if (int_mode != int_target_mode)
5723         op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5724
5725       return op0;
5726     }
5727
5728   mclass = GET_MODE_CLASS (mode);
5729   FOR_EACH_MODE_FROM (compare_mode, mode)
5730     {
5731      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5732      icode = optab_handler (cstore_optab, optab_mode);
5733      if (icode != CODE_FOR_nothing)
5734         {
5735           do_pending_stack_adjust ();
5736           rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5737                                  unsignedp, op0, op1, normalizep, target_mode);
5738           if (tem)
5739             return tem;
5740
5741           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5742             {
5743               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5744                                  unsignedp, op1, op0, normalizep, target_mode);
5745               if (tem)
5746                 return tem;
5747             }
5748           break;
5749         }
5750     }
5751
5752   return 0;
5753 }
5754
5755 /* Subroutine of emit_store_flag that handles cases in which the operands
5756    are scalar integers.  SUBTARGET is the target to use for temporary
5757    operations and TRUEVAL is the value to store when the condition is
5758    true.  All other arguments are as for emit_store_flag.  */
5759
5760 rtx
5761 emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0,
5762                      rtx op1, scalar_int_mode mode, int unsignedp,
5763                      int normalizep, rtx trueval)
5764 {
5765   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5766   rtx_insn *last = get_last_insn ();
5767
5768   /* If this is an equality comparison of integers, we can try to exclusive-or
5769      (or subtract) the two operands and use a recursive call to try the
5770      comparison with zero.  Don't do any of these cases if branches are
5771      very cheap.  */
5772
5773   if ((code == EQ || code == NE) && op1 != const0_rtx)
5774     {
5775       rtx tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5776                               OPTAB_WIDEN);
5777
5778       if (tem == 0)
5779         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5780                             OPTAB_WIDEN);
5781       if (tem != 0)
5782         tem = emit_store_flag (target, code, tem, const0_rtx,
5783                                mode, unsignedp, normalizep);
5784       if (tem != 0)
5785         return tem;
5786
5787       delete_insns_since (last);
5788     }
5789
5790   /* For integer comparisons, try the reverse comparison.  However, for
5791      small X and if we'd have anyway to extend, implementing "X != 0"
5792      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5793   rtx_code rcode = reverse_condition (code);
5794   if (can_compare_p (rcode, mode, ccp_store_flag)
5795       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5796             && code == NE
5797             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5798             && op1 == const0_rtx))
5799     {
5800       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5801                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5802
5803       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5804       if (want_add
5805           && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5806                        optimize_insn_for_speed_p ()) == 0)
5807         {
5808           rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5809                                        STORE_FLAG_VALUE, target_mode);
5810           if (tem != 0)
5811             tem = expand_binop (target_mode, add_optab, tem,
5812                                 gen_int_mode (normalizep, target_mode),
5813                                 target, 0, OPTAB_WIDEN);
5814           if (tem != 0)
5815             return tem;
5816         }
5817       else if (!want_add
5818                && rtx_cost (trueval, mode, XOR, 1,
5819                             optimize_insn_for_speed_p ()) == 0)
5820         {
5821           rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5822                                        normalizep, target_mode);
5823           if (tem != 0)
5824             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5825                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5826           if (tem != 0)
5827             return tem;
5828         }
5829
5830       delete_insns_since (last);
5831     }
5832
5833   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5834      the constant zero.  Reject all other comparisons at this point.  Only
5835      do LE and GT if branches are expensive since they are expensive on
5836      2-operand machines.  */
5837
5838   if (op1 != const0_rtx
5839       || (code != EQ && code != NE
5840           && (BRANCH_COST (optimize_insn_for_speed_p (),
5841                            false) <= 1 || (code != LE && code != GT))))
5842     return 0;
5843
5844   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5845      do the necessary operation below.  */
5846
5847   rtx tem = 0;
5848
5849   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5850      the sign bit set.  */
5851
5852   if (code == LE)
5853     {
5854       /* This is destructive, so SUBTARGET can't be OP0.  */
5855       if (rtx_equal_p (subtarget, op0))
5856         subtarget = 0;
5857
5858       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5859                           OPTAB_WIDEN);
5860       if (tem)
5861         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5862                             OPTAB_WIDEN);
5863     }
5864
5865   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5866      number of bits in the mode of OP0, minus one.  */
5867
5868   if (code == GT)
5869     {
5870       if (rtx_equal_p (subtarget, op0))
5871         subtarget = 0;
5872
5873       tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5874                                 GET_MODE_BITSIZE (mode) - 1,
5875                                 subtarget, 0);
5876       if (tem)
5877         tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5878                             OPTAB_WIDEN);
5879     }
5880
5881   if (code == EQ || code == NE)
5882     {
5883       /* For EQ or NE, one way to do the comparison is to apply an operation
5884          that converts the operand into a positive number if it is nonzero
5885          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5886          for NE we negate.  This puts the result in the sign bit.  Then we
5887          normalize with a shift, if needed.
5888
5889          Two operations that can do the above actions are ABS and FFS, so try
5890          them.  If that doesn't work, and MODE is smaller than a full word,
5891          we can use zero-extension to the wider mode (an unsigned conversion)
5892          as the operation.  */
5893
5894       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5895          that is compensated by the subsequent overflow when subtracting
5896          one / negating.  */
5897
5898       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5899         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5900       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5901         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5902       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5903         {
5904           tem = convert_modes (word_mode, mode, op0, 1);
5905           mode = word_mode;
5906         }
5907
5908       if (tem != 0)
5909         {
5910           if (code == EQ)
5911             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5912                                 0, OPTAB_WIDEN);
5913           else
5914             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5915         }
5916
5917       /* If we couldn't do it that way, for NE we can "or" the two's complement
5918          of the value with itself.  For EQ, we take the one's complement of
5919          that "or", which is an extra insn, so we only handle EQ if branches
5920          are expensive.  */
5921
5922       if (tem == 0
5923           && (code == NE
5924               || BRANCH_COST (optimize_insn_for_speed_p (),
5925                               false) > 1))
5926         {
5927           if (rtx_equal_p (subtarget, op0))
5928             subtarget = 0;
5929
5930           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5931           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5932                               OPTAB_WIDEN);
5933
5934           if (tem && code == EQ)
5935             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5936         }
5937     }
5938
5939   if (tem && normalizep)
5940     tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5941                               GET_MODE_BITSIZE (mode) - 1,
5942                               subtarget, normalizep == 1);
5943
5944   if (tem)
5945     {
5946       if (!target)
5947         ;
5948       else if (GET_MODE (tem) != target_mode)
5949         {
5950           convert_move (target, tem, 0);
5951           tem = target;
5952         }
5953       else if (!subtarget)
5954         {
5955           emit_move_insn (target, tem);
5956           tem = target;
5957         }
5958     }
5959   else
5960     delete_insns_since (last);
5961
5962   return tem;
5963 }
5964
5965 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5966    and storing in TARGET.  Normally return TARGET.
5967    Return 0 if that cannot be done.
5968
5969    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5970    it is VOIDmode, they cannot both be CONST_INT.
5971
5972    UNSIGNEDP is for the case where we have to widen the operands
5973    to perform the operation.  It says to use zero-extension.
5974
5975    NORMALIZEP is 1 if we should convert the result to be either zero
5976    or one.  Normalize is -1 if we should convert the result to be
5977    either zero or -1.  If NORMALIZEP is zero, the result will be left
5978    "raw" out of the scc insn.  */
5979
5980 rtx
5981 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5982                  machine_mode mode, int unsignedp, int normalizep)
5983 {
5984   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5985   enum rtx_code rcode;
5986   rtx subtarget;
5987   rtx tem, trueval;
5988   rtx_insn *last;
5989
5990   /* If we compare constants, we shouldn't use a store-flag operation,
5991      but a constant load.  We can get there via the vanilla route that
5992      usually generates a compare-branch sequence, but will in this case
5993      fold the comparison to a constant, and thus elide the branch.  */
5994   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5995     return NULL_RTX;
5996
5997   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5998                            target_mode);
5999   if (tem)
6000     return tem;
6001
6002   /* If we reached here, we can't do this with a scc insn, however there
6003      are some comparisons that can be done in other ways.  Don't do any
6004      of these cases if branches are very cheap.  */
6005   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
6006     return 0;
6007
6008   /* See what we need to return.  We can only return a 1, -1, or the
6009      sign bit.  */
6010
6011   if (normalizep == 0)
6012     {
6013       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
6014         normalizep = STORE_FLAG_VALUE;
6015
6016       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
6017         ;
6018       else
6019         return 0;
6020     }
6021
6022   last = get_last_insn ();
6023
6024   /* If optimizing, use different pseudo registers for each insn, instead
6025      of reusing the same pseudo.  This leads to better CSE, but slows
6026      down the compiler, since there are more pseudos.  */
6027   subtarget = (!optimize
6028                && (target_mode == mode)) ? target : NULL_RTX;
6029   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
6030
6031   /* For floating-point comparisons, try the reverse comparison or try
6032      changing the "orderedness" of the comparison.  */
6033   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6034     {
6035       enum rtx_code first_code;
6036       bool and_them;
6037
6038       rcode = reverse_condition_maybe_unordered (code);
6039       if (can_compare_p (rcode, mode, ccp_store_flag)
6040           && (code == ORDERED || code == UNORDERED
6041               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6042               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6043         {
6044           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
6045                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
6046
6047           /* For the reverse comparison, use either an addition or a XOR.  */
6048           if (want_add
6049               && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
6050                            optimize_insn_for_speed_p ()) == 0)
6051             {
6052               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6053                                        STORE_FLAG_VALUE, target_mode);
6054               if (tem)
6055                 return expand_binop (target_mode, add_optab, tem,
6056                                      gen_int_mode (normalizep, target_mode),
6057                                      target, 0, OPTAB_WIDEN);
6058             }
6059           else if (!want_add
6060                    && rtx_cost (trueval, mode, XOR, 1,
6061                                 optimize_insn_for_speed_p ()) == 0)
6062             {
6063               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6064                                        normalizep, target_mode);
6065               if (tem)
6066                 return expand_binop (target_mode, xor_optab, tem, trueval,
6067                                      target, INTVAL (trueval) >= 0,
6068                                      OPTAB_WIDEN);
6069             }
6070         }
6071
6072       delete_insns_since (last);
6073
6074       /* Cannot split ORDERED and UNORDERED, only try the above trick.  */
6075       if (code == ORDERED || code == UNORDERED)
6076         return 0;
6077
6078       and_them = split_comparison (code, mode, &first_code, &code);
6079
6080       /* If there are no NaNs, the first comparison should always fall through.
6081          Effectively change the comparison to the other one.  */
6082       if (!HONOR_NANS (mode))
6083         {
6084           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
6085           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
6086                                     target_mode);
6087         }
6088
6089       if (!HAVE_conditional_move)
6090         return 0;
6091
6092       /* Do not turn a trapping comparison into a non-trapping one.  */
6093       if ((code != EQ && code != NE && code != UNEQ && code != LTGT)
6094           && flag_trapping_math)
6095         return 0;
6096
6097       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
6098          conditional move.  */
6099       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
6100                                normalizep, target_mode);
6101       if (tem == 0)
6102         return 0;
6103
6104       if (and_them)
6105         tem = emit_conditional_move (target, code, op0, op1, mode,
6106                                      tem, const0_rtx, GET_MODE (tem), 0);
6107       else
6108         tem = emit_conditional_move (target, code, op0, op1, mode,
6109                                      trueval, tem, GET_MODE (tem), 0);
6110
6111       if (tem == 0)
6112         delete_insns_since (last);
6113       return tem;
6114     }
6115
6116   /* The remaining tricks only apply to integer comparisons.  */
6117
6118   scalar_int_mode int_mode;
6119   if (is_int_mode (mode, &int_mode))
6120     return emit_store_flag_int (target, subtarget, code, op0, op1, int_mode,
6121                                 unsignedp, normalizep, trueval);
6122
6123   return 0;
6124 }
6125
6126 /* Like emit_store_flag, but always succeeds.  */
6127
6128 rtx
6129 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
6130                        machine_mode mode, int unsignedp, int normalizep)
6131 {
6132   rtx tem;
6133   rtx_code_label *label;
6134   rtx trueval, falseval;
6135
6136   /* First see if emit_store_flag can do the job.  */
6137   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
6138   if (tem != 0)
6139     return tem;
6140
6141   /* If one operand is constant, make it the second one.  Only do this
6142      if the other operand is not constant as well.  */
6143   if (swap_commutative_operands_p (op0, op1))
6144     {
6145       std::swap (op0, op1);
6146       code = swap_condition (code);
6147     }
6148
6149   if (mode == VOIDmode)
6150     mode = GET_MODE (op0);
6151
6152   if (!target)
6153     target = gen_reg_rtx (word_mode);
6154
6155   /* If this failed, we have to do this with set/compare/jump/set code.
6156      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
6157   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
6158   if (code == NE
6159       && GET_MODE_CLASS (mode) == MODE_INT
6160       && REG_P (target)
6161       && op0 == target
6162       && op1 == const0_rtx)
6163     {
6164       label = gen_label_rtx ();
6165       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
6166                                NULL_RTX, NULL, label,
6167                                profile_probability::uninitialized ());
6168       emit_move_insn (target, trueval);
6169       emit_label (label);
6170       return target;
6171     }
6172
6173   if (!REG_P (target)
6174       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
6175     target = gen_reg_rtx (GET_MODE (target));
6176
6177   /* Jump in the right direction if the target cannot implement CODE
6178      but can jump on its reverse condition.  */
6179   falseval = const0_rtx;
6180   if (! can_compare_p (code, mode, ccp_jump)
6181       && (! FLOAT_MODE_P (mode)
6182           || code == ORDERED || code == UNORDERED
6183           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6184           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6185     {
6186       enum rtx_code rcode;
6187       if (FLOAT_MODE_P (mode))
6188         rcode = reverse_condition_maybe_unordered (code);
6189       else
6190         rcode = reverse_condition (code);
6191
6192       /* Canonicalize to UNORDERED for the libcall.  */
6193       if (can_compare_p (rcode, mode, ccp_jump)
6194           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
6195         {
6196           falseval = trueval;
6197           trueval = const0_rtx;
6198           code = rcode;
6199         }
6200     }
6201
6202   emit_move_insn (target, trueval);
6203   label = gen_label_rtx ();
6204   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
6205                            label, profile_probability::uninitialized ());
6206
6207   emit_move_insn (target, falseval);
6208   emit_label (label);
6209
6210   return target;
6211 }
6212
6213 /* Helper function for canonicalize_cmp_for_target.  Swap between inclusive
6214    and exclusive ranges in order to create an equivalent comparison.  See
6215    canonicalize_cmp_for_target for the possible cases.  */
6216
6217 static enum rtx_code
6218 equivalent_cmp_code (enum rtx_code code)
6219 {
6220   switch (code)
6221     {
6222     case GT:
6223       return GE;
6224     case GE:
6225       return GT;
6226     case LT:
6227       return LE;
6228     case LE:
6229       return LT;
6230     case GTU:
6231       return GEU;
6232     case GEU:
6233       return GTU;
6234     case LTU:
6235       return LEU;
6236     case LEU:
6237       return LTU;
6238
6239     default:
6240       return code;
6241     }
6242 }
6243
6244 /* Choose the more appropiate immediate in scalar integer comparisons.  The
6245    purpose of this is to end up with an immediate which can be loaded into a
6246    register in fewer moves, if possible.
6247
6248    For each integer comparison there exists an equivalent choice:
6249      i)   a >  b or a >= b + 1
6250      ii)  a <= b or a <  b + 1
6251      iii) a >= b or a >  b - 1
6252      iv)  a <  b or a <= b - 1
6253
6254    MODE is the mode of the first operand.
6255    CODE points to the comparison code.
6256    IMM points to the rtx containing the immediate.  *IMM must satisfy
6257    CONST_SCALAR_INT_P on entry and continues to satisfy CONST_SCALAR_INT_P
6258    on exit.  */
6259
6260 void
6261 canonicalize_comparison (machine_mode mode, enum rtx_code *code, rtx *imm)
6262 {
6263   if (!SCALAR_INT_MODE_P (mode))
6264     return;
6265
6266   int to_add = 0;
6267   enum signop sgn = unsigned_condition_p (*code) ? UNSIGNED : SIGNED;
6268
6269   /* Extract the immediate value from the rtx.  */
6270   wide_int imm_val = rtx_mode_t (*imm, mode);
6271
6272   if (*code == GT || *code == GTU || *code == LE || *code == LEU)
6273     to_add = 1;
6274   else if (*code == GE || *code == GEU || *code == LT || *code == LTU)
6275     to_add = -1;
6276   else
6277     return;
6278
6279   /* Check for overflow/underflow in the case of signed values and
6280      wrapping around in the case of unsigned values.  If any occur
6281      cancel the optimization.  */
6282   wi::overflow_type overflow = wi::OVF_NONE;
6283   wide_int imm_modif;
6284
6285   if (to_add == 1)
6286     imm_modif = wi::add (imm_val, 1, sgn, &overflow);
6287   else
6288     imm_modif = wi::sub (imm_val, 1, sgn, &overflow);
6289
6290   if (overflow)
6291     return;
6292
6293   /* The following creates a pseudo; if we cannot do that, bail out.  */
6294   if (!can_create_pseudo_p ())
6295     return;
6296
6297   rtx reg = gen_rtx_REG (mode, LAST_VIRTUAL_REGISTER + 1);
6298   rtx new_imm = immed_wide_int_const (imm_modif, mode);
6299
6300   rtx_insn *old_rtx = gen_move_insn (reg, *imm);
6301   rtx_insn *new_rtx = gen_move_insn (reg, new_imm);
6302
6303   /* Update the immediate and the code.  */
6304   if (insn_cost (old_rtx, true) > insn_cost (new_rtx, true))
6305     {
6306       *code = equivalent_cmp_code (*code);
6307       *imm = new_imm;
6308     }
6309 }
6310
6311
6312 \f
6313 /* Perform possibly multi-word comparison and conditional jump to LABEL
6314    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
6315    now a thin wrapper around do_compare_rtx_and_jump.  */
6316
6317 static void
6318 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6319                  rtx_code_label *label)
6320 {
6321   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6322   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6323                            NULL, label, profile_probability::uninitialized ());
6324 }