gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2019 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "target.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "predict.h"
  30 #include "memmodel.h"
  31 #include "tm_p.h"
  32 #include "expmed.h"
  33 #include "optabs.h"
  34 #include "regs.h"
  35 #include "emit-rtl.h"
  36 #include "diagnostic-core.h"
  37 #include "fold-const.h"
  38 #include "stor-layout.h"
  39 #include "dojump.h"
  40 #include "explow.h"
  41 #include "expr.h"
  42 #include "langhooks.h"
  43 #include "tree-vector-builder.h"
  44
  45 struct target_expmed default_target_expmed;
  46 #if SWITCHABLE_TARGET
  47 struct target_expmed *this_target_expmed = &default_target_expmed;
  48 #endif
  49
  50 static bool store_integral_bit_field (rtx, opt_scalar_int_mode,
  51                                       unsigned HOST_WIDE_INT,
  52                                       unsigned HOST_WIDE_INT,
  53                                       poly_uint64, poly_uint64,
  54                                       machine_mode, rtx, bool, bool);
  55 static void store_fixed_bit_field (rtx, opt_scalar_int_mode,
  56                                    unsigned HOST_WIDE_INT,
  57                                    unsigned HOST_WIDE_INT,
  58                                    poly_uint64, poly_uint64,
  59                                    rtx, scalar_int_mode, bool);
  60 static void store_fixed_bit_field_1 (rtx, scalar_int_mode,
  61                                      unsigned HOST_WIDE_INT,
  62                                      unsigned HOST_WIDE_INT,
  63                                      rtx, scalar_int_mode, bool);
  64 static void store_split_bit_field (rtx, opt_scalar_int_mode,
  65                                    unsigned HOST_WIDE_INT,
  66                                    unsigned HOST_WIDE_INT,
  67                                    poly_uint64, poly_uint64,
  68                                    rtx, scalar_int_mode, bool);
  69 static rtx extract_integral_bit_field (rtx, opt_scalar_int_mode,
  70                                        unsigned HOST_WIDE_INT,
  71                                        unsigned HOST_WIDE_INT, int, rtx,
  72                                        machine_mode, machine_mode, bool, bool);
  73 static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode,
  74                                     unsigned HOST_WIDE_INT,
  75                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  76 static rtx extract_fixed_bit_field_1 (machine_mode, rtx, scalar_int_mode,
  77                                       unsigned HOST_WIDE_INT,
  78                                       unsigned HOST_WIDE_INT, rtx, int, bool);
  79 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  80 static rtx extract_split_bit_field (rtx, opt_scalar_int_mode,
  81                                     unsigned HOST_WIDE_INT,
  82                                     unsigned HOST_WIDE_INT, int, bool);
  83 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  84 static rtx expand_smod_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
  85 static rtx expand_sdiv_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
  86
  87 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  88    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  89    The mask is truncated if necessary to the width of mode MODE.  The
  90    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
  91
  92 static inline rtx
  93 mask_rtx (scalar_int_mode mode, int bitpos, int bitsize, bool complement)
  94 {
  95   return immed_wide_int_const
  96     (wi::shifted_mask (bitpos, bitsize, complement,
  97                        GET_MODE_PRECISION (mode)), mode);
  98 }
  99
 100 /* Test whether a value is zero of a power of two.  */
 101 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
 102   (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
 103
 104 struct init_expmed_rtl
 105 {
 106   rtx reg;
 107   rtx plus;
 108   rtx neg;
 109   rtx mult;
 110   rtx sdiv;
 111   rtx udiv;
 112   rtx sdiv_32;
 113   rtx smod_32;
 114   rtx wide_mult;
 115   rtx wide_lshr;
 116   rtx wide_trunc;
 117   rtx shift;
 118   rtx shift_mult;
 119   rtx shift_add;
 120   rtx shift_sub0;
 121   rtx shift_sub1;
 122   rtx zext;
 123   rtx trunc;
 124
 125   rtx pow2[MAX_BITS_PER_WORD];
 126   rtx cint[MAX_BITS_PER_WORD];
 127 };
 128
 129 static void
 130 init_expmed_one_conv (struct init_expmed_rtl *all, scalar_int_mode to_mode,
 131                       scalar_int_mode from_mode, bool speed)
 132 {
 133   int to_size, from_size;
 134   rtx which;
 135
 136   to_size = GET_MODE_PRECISION (to_mode);
 137   from_size = GET_MODE_PRECISION (from_mode);
 138
 139   /* Most partial integers have a precision less than the "full"
 140      integer it requires for storage.  In case one doesn't, for
 141      comparison purposes here, reduce the bit size by one in that
 142      case.  */
 143   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 144       && pow2p_hwi (to_size))
 145     to_size --;
 146   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 147       && pow2p_hwi (from_size))
 148     from_size --;
 149
 150   /* Assume cost of zero-extend and sign-extend is the same.  */
 151   which = (to_size < from_size ? all->trunc : all->zext);
 152
 153   PUT_MODE (all->reg, from_mode);
 154   set_convert_cost (to_mode, from_mode, speed,
 155                     set_src_cost (which, to_mode, speed));
 156 }
 157
 158 static void
 159 init_expmed_one_mode (struct init_expmed_rtl *all,
 160                       machine_mode mode, int speed)
 161 {
 162   int m, n, mode_bitsize;
 163   machine_mode mode_from;
 164
 165   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 166
 167   PUT_MODE (all->reg, mode);
 168   PUT_MODE (all->plus, mode);
 169   PUT_MODE (all->neg, mode);
 170   PUT_MODE (all->mult, mode);
 171   PUT_MODE (all->sdiv, mode);
 172   PUT_MODE (all->udiv, mode);
 173   PUT_MODE (all->sdiv_32, mode);
 174   PUT_MODE (all->smod_32, mode);
 175   PUT_MODE (all->wide_trunc, mode);
 176   PUT_MODE (all->shift, mode);
 177   PUT_MODE (all->shift_mult, mode);
 178   PUT_MODE (all->shift_add, mode);
 179   PUT_MODE (all->shift_sub0, mode);
 180   PUT_MODE (all->shift_sub1, mode);
 181   PUT_MODE (all->zext, mode);
 182   PUT_MODE (all->trunc, mode);
 183
 184   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
 185   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
 186   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
 187   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
 188   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
 189
 190   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
 191                                      <= 2 * add_cost (speed, mode)));
 192   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
 193                                      <= 4 * add_cost (speed, mode)));
 194
 195   set_shift_cost (speed, mode, 0, 0);
 196   {
 197     int cost = add_cost (speed, mode);
 198     set_shiftadd_cost (speed, mode, 0, cost);
 199     set_shiftsub0_cost (speed, mode, 0, cost);
 200     set_shiftsub1_cost (speed, mode, 0, cost);
 201   }
 202
 203   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 204   for (m = 1; m < n; m++)
 205     {
 206       XEXP (all->shift, 1) = all->cint[m];
 207       XEXP (all->shift_mult, 1) = all->pow2[m];
 208
 209       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
 210       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
 211                                                        speed));
 212       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
 213                                                         speed));
 214       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
 215                                                         speed));
 216     }
 217
 218   scalar_int_mode int_mode_to;
 219   if (is_a <scalar_int_mode> (mode, &int_mode_to))
 220     {
 221       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 222            mode_from = (machine_mode)(mode_from + 1))
 223         init_expmed_one_conv (all, int_mode_to,
 224                               as_a <scalar_int_mode> (mode_from), speed);
 225
 226       scalar_int_mode wider_mode;
 227       if (GET_MODE_CLASS (int_mode_to) == MODE_INT
 228           && GET_MODE_WIDER_MODE (int_mode_to).exists (&wider_mode))
 229         {
 230           PUT_MODE (all->zext, wider_mode);
 231           PUT_MODE (all->wide_mult, wider_mode);
 232           PUT_MODE (all->wide_lshr, wider_mode);
 233           XEXP (all->wide_lshr, 1)
 234             = gen_int_shift_amount (wider_mode, mode_bitsize);
 235
 236           set_mul_widen_cost (speed, wider_mode,
 237                               set_src_cost (all->wide_mult, wider_mode, speed));
 238           set_mul_highpart_cost (speed, int_mode_to,
 239                                  set_src_cost (all->wide_trunc,
 240                                                int_mode_to, speed));
 241         }
 242     }
 243 }
 244
 245 void
 246 init_expmed (void)
 247 {
 248   struct init_expmed_rtl all;
 249   machine_mode mode = QImode;
 250   int m, speed;
 251
 252   memset (&all, 0, sizeof all);
 253   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 254     {
 255       all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
 256       all.cint[m] = GEN_INT (m);
 257     }
 258
 259   /* Avoid using hard regs in ways which may be unsupported.  */
 260   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 261   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 262   all.neg = gen_rtx_NEG (mode, all.reg);
 263   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 264   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 265   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 266   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 267   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 268   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 269   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 270   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 271   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 272   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 273   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 274   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 275   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 276   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 277   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 278
 279   for (speed = 0; speed < 2; speed++)
 280     {
 281       crtl->maybe_hot_insn_p = speed;
 282       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
 283
 284       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 285            mode = (machine_mode)(mode + 1))
 286         init_expmed_one_mode (&all, mode, speed);
 287
 288       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 289         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 290              mode = (machine_mode)(mode + 1))
 291           init_expmed_one_mode (&all, mode, speed);
 292
 293       if (MIN_MODE_VECTOR_INT != VOIDmode)
 294         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 295              mode = (machine_mode)(mode + 1))
 296           init_expmed_one_mode (&all, mode, speed);
 297     }
 298
 299   if (alg_hash_used_p ())
 300     {
 301       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 302       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 303     }
 304   else
 305     set_alg_hash_used_p (true);
 306   default_rtl_profile ();
 307
 308   ggc_free (all.trunc);
 309   ggc_free (all.shift_sub1);
 310   ggc_free (all.shift_sub0);
 311   ggc_free (all.shift_add);
 312   ggc_free (all.shift_mult);
 313   ggc_free (all.shift);
 314   ggc_free (all.wide_trunc);
 315   ggc_free (all.wide_lshr);
 316   ggc_free (all.wide_mult);
 317   ggc_free (all.zext);
 318   ggc_free (all.smod_32);
 319   ggc_free (all.sdiv_32);
 320   ggc_free (all.udiv);
 321   ggc_free (all.sdiv);
 322   ggc_free (all.mult);
 323   ggc_free (all.neg);
 324   ggc_free (all.plus);
 325   ggc_free (all.reg);
 326 }
 327
 328 /* Return an rtx representing minus the value of X.
 329    MODE is the intended mode of the result,
 330    useful if X is a CONST_INT.  */
 331
 332 rtx
 333 negate_rtx (machine_mode mode, rtx x)
 334 {
 335   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 336
 337   if (result == 0)
 338     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 339
 340   return result;
 341 }
 342
 343 /* Whether reverse storage order is supported on the target.  */
 344 static int reverse_storage_order_supported = -1;
 345
 346 /* Check whether reverse storage order is supported on the target.  */
 347
 348 static void
 349 check_reverse_storage_order_support (void)
 350 {
 351   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 352     {
 353       reverse_storage_order_supported = 0;
 354       sorry ("reverse scalar storage order");
 355     }
 356   else
 357     reverse_storage_order_supported = 1;
 358 }
 359
 360 /* Whether reverse FP storage order is supported on the target.  */
 361 static int reverse_float_storage_order_supported = -1;
 362
 363 /* Check whether reverse FP storage order is supported on the target.  */
 364
 365 static void
 366 check_reverse_float_storage_order_support (void)
 367 {
 368   if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 369     {
 370       reverse_float_storage_order_supported = 0;
 371       sorry ("reverse floating-point scalar storage order");
 372     }
 373   else
 374     reverse_float_storage_order_supported = 1;
 375 }
 376
 377 /* Return an rtx representing value of X with reverse storage order.
 378    MODE is the intended mode of the result,
 379    useful if X is a CONST_INT.  */
 380
 381 rtx
 382 flip_storage_order (machine_mode mode, rtx x)
 383 {
 384   scalar_int_mode int_mode;
 385   rtx result;
 386
 387   if (mode == QImode)
 388     return x;
 389
 390   if (COMPLEX_MODE_P (mode))
 391     {
 392       rtx real = read_complex_part (x, false);
 393       rtx imag = read_complex_part (x, true);
 394
 395       real = flip_storage_order (GET_MODE_INNER (mode), real);
 396       imag = flip_storage_order (GET_MODE_INNER (mode), imag);
 397
 398       return gen_rtx_CONCAT (mode, real, imag);
 399     }
 400
 401   if (__builtin_expect (reverse_storage_order_supported < 0, 0))
 402     check_reverse_storage_order_support ();
 403
 404   if (!is_a <scalar_int_mode> (mode, &int_mode))
 405     {
 406       if (FLOAT_MODE_P (mode)
 407           && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
 408         check_reverse_float_storage_order_support ();
 409
 410       if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode))
 411         {
 412           sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
 413           return x;
 414         }
 415       x = gen_lowpart (int_mode, x);
 416     }
 417
 418   result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
 419   if (result == 0)
 420     result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
 421
 422   if (int_mode != mode)
 423     result = gen_lowpart (mode, result);
 424
 425   return result;
 426 }
 427
 428 /* If MODE is set, adjust bitfield memory MEM so that it points to the
 429    first unit of mode MODE that contains a bitfield of size BITSIZE at
 430    bit position BITNUM.  If MODE is not set, return a BLKmode reference
 431    to every byte in the bitfield.  Set *NEW_BITNUM to the bit position
 432    of the field within the new memory.  */
 433
 434 static rtx
 435 narrow_bit_field_mem (rtx mem, opt_scalar_int_mode mode,
 436                       unsigned HOST_WIDE_INT bitsize,
 437                       unsigned HOST_WIDE_INT bitnum,
 438                       unsigned HOST_WIDE_INT *new_bitnum)
 439 {
 440   scalar_int_mode imode;
 441   if (mode.exists (&imode))
 442     {
 443       unsigned int unit = GET_MODE_BITSIZE (imode);
 444       *new_bitnum = bitnum % unit;
 445       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 446       return adjust_bitfield_address (mem, imode, offset);
 447     }
 448   else
 449     {
 450       *new_bitnum = bitnum % BITS_PER_UNIT;
 451       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 452       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 453                             / BITS_PER_UNIT);
 454       return adjust_bitfield_address_size (mem, BLKmode, offset, size);
 455     }
 456 }
 457
 458 /* The caller wants to perform insertion or extraction PATTERN on a
 459    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 460    BITREGION_START and BITREGION_END are as for store_bit_field
 461    and FIELDMODE is the natural mode of the field.
 462
 463    Search for a mode that is compatible with the memory access
 464    restrictions and (where applicable) with a register insertion or
 465    extraction.  Return the new memory on success, storing the adjusted
 466    bit position in *NEW_BITNUM.  Return null otherwise.  */
 467
 468 static rtx
 469 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 470                               rtx op0, HOST_WIDE_INT bitsize,
 471                               HOST_WIDE_INT bitnum,
 472                               poly_uint64 bitregion_start,
 473                               poly_uint64 bitregion_end,
 474                               machine_mode fieldmode,
 475                               unsigned HOST_WIDE_INT *new_bitnum)
 476 {
 477   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 478                                 bitregion_end, MEM_ALIGN (op0),
 479                                 MEM_VOLATILE_P (op0));
 480   scalar_int_mode best_mode;
 481   if (iter.next_mode (&best_mode))
 482     {
 483       /* We can use a memory in BEST_MODE.  See whether this is true for
 484          any wider modes.  All other things being equal, we prefer to
 485          use the widest mode possible because it tends to expose more
 486          CSE opportunities.  */
 487       if (!iter.prefer_smaller_modes ())
 488         {
 489           /* Limit the search to the mode required by the corresponding
 490              register insertion or extraction instruction, if any.  */
 491           scalar_int_mode limit_mode = word_mode;
 492           extraction_insn insn;
 493           if (get_best_reg_extraction_insn (&insn, pattern,
 494                                             GET_MODE_BITSIZE (best_mode),
 495                                             fieldmode))
 496             limit_mode = insn.field_mode;
 497
 498           scalar_int_mode wider_mode;
 499           while (iter.next_mode (&wider_mode)
 500                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 501             best_mode = wider_mode;
 502         }
 503       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 504                                    new_bitnum);
 505     }
 506   return NULL_RTX;
 507 }
 508
 509 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 510    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 511    offset is then BITNUM / BITS_PER_UNIT.  */
 512
 513 static bool
 514 lowpart_bit_field_p (poly_uint64 bitnum, poly_uint64 bitsize,
 515                      machine_mode struct_mode)
 516 {
 517   poly_uint64 regsize = REGMODE_NATURAL_SIZE (struct_mode);
 518   if (BYTES_BIG_ENDIAN)
 519     return (multiple_p (bitnum, BITS_PER_UNIT)
 520             && (known_eq (bitnum + bitsize, GET_MODE_BITSIZE (struct_mode))
 521                 || multiple_p (bitnum + bitsize,
 522                                regsize * BITS_PER_UNIT)));
 523   else
 524     return multiple_p (bitnum, regsize * BITS_PER_UNIT);
 525 }
 526
 527 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 528    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 529    Return false if the access would touch memory outside the range
 530    BITREGION_START to BITREGION_END for conformance to the C++ memory
 531    model.  */
 532
 533 static bool
 534 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 535                             unsigned HOST_WIDE_INT bitnum,
 536                             scalar_int_mode fieldmode,
 537                             poly_uint64 bitregion_start,
 538                             poly_uint64 bitregion_end)
 539 {
 540   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 541
 542   /* -fstrict-volatile-bitfields must be enabled and we must have a
 543      volatile MEM.  */
 544   if (!MEM_P (op0)
 545       || !MEM_VOLATILE_P (op0)
 546       || flag_strict_volatile_bitfields <= 0)
 547     return false;
 548
 549   /* The bit size must not be larger than the field mode, and
 550      the field mode must not be larger than a word.  */
 551   if (bitsize > modesize || modesize > BITS_PER_WORD)
 552     return false;
 553
 554   /* Check for cases of unaligned fields that must be split.  */
 555   if (bitnum % modesize + bitsize > modesize)
 556     return false;
 557
 558   /* The memory must be sufficiently aligned for a MODESIZE access.
 559      This condition guarantees, that the memory access will not
 560      touch anything after the end of the structure.  */
 561   if (MEM_ALIGN (op0) < modesize)
 562     return false;
 563
 564   /* Check for cases where the C++ memory model applies.  */
 565   if (maybe_ne (bitregion_end, 0U)
 566       && (maybe_lt (bitnum - bitnum % modesize, bitregion_start)
 567           || maybe_gt (bitnum - bitnum % modesize + modesize - 1,
 568                        bitregion_end)))
 569     return false;
 570
 571   return true;
 572 }
 573
 574 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 575    bit number BITNUM can be treated as a simple value of mode MODE.
 576    Store the byte offset in *BYTENUM if so.  */
 577
 578 static bool
 579 simple_mem_bitfield_p (rtx op0, poly_uint64 bitsize, poly_uint64 bitnum,
 580                        machine_mode mode, poly_uint64 *bytenum)
 581 {
 582   return (MEM_P (op0)
 583           && multiple_p (bitnum, BITS_PER_UNIT, bytenum)
 584           && known_eq (bitsize, GET_MODE_BITSIZE (mode))
 585           && (!targetm.slow_unaligned_access (mode, MEM_ALIGN (op0))
 586               || (multiple_p (bitnum, GET_MODE_ALIGNMENT (mode))
 587                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 588 }
 589 \f
 590 /* Try to use instruction INSV to store VALUE into a field of OP0.
 591    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a
 592    BLKmode MEM.  VALUE_MODE is the mode of VALUE.  BITSIZE and BITNUM
 593    are as for store_bit_field.  */
 594
 595 static bool
 596 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 597                             opt_scalar_int_mode op0_mode,
 598                             unsigned HOST_WIDE_INT bitsize,
 599                             unsigned HOST_WIDE_INT bitnum,
 600                             rtx value, scalar_int_mode value_mode)
 601 {
 602   class expand_operand ops[4];
 603   rtx value1;
 604   rtx xop0 = op0;
 605   rtx_insn *last = get_last_insn ();
 606   bool copy_back = false;
 607
 608   scalar_int_mode op_mode = insv->field_mode;
 609   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 610   if (bitsize == 0 || bitsize > unit)
 611     return false;
 612
 613   if (MEM_P (xop0))
 614     /* Get a reference to the first byte of the field.  */
 615     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 616                                  &bitnum);
 617   else
 618     {
 619       /* Convert from counting within OP0 to counting in OP_MODE.  */
 620       if (BYTES_BIG_ENDIAN)
 621         bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
 622
 623       /* If xop0 is a register, we need it in OP_MODE
 624          to make it acceptable to the format of insv.  */
 625       if (GET_CODE (xop0) == SUBREG)
 626         /* We can't just change the mode, because this might clobber op0,
 627            and we will need the original value of op0 if insv fails.  */
 628         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 629       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 630         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 631     }
 632
 633   /* If the destination is a paradoxical subreg such that we need a
 634      truncate to the inner mode, perform the insertion on a temporary and
 635      truncate the result to the original destination.  Note that we can't
 636      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 637      X) 0)) is (reg:N X).  */
 638   if (GET_CODE (xop0) == SUBREG
 639       && REG_P (SUBREG_REG (xop0))
 640       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 641                                          op_mode))
 642     {
 643       rtx tem = gen_reg_rtx (op_mode);
 644       emit_move_insn (tem, xop0);
 645       xop0 = tem;
 646       copy_back = true;
 647     }
 648
 649   /* There are similar overflow check at the start of store_bit_field_1,
 650      but that only check the situation where the field lies completely
 651      outside the register, while there do have situation where the field
 652      lies partialy in the register, we need to adjust bitsize for this
 653      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
 654      will broken on those arch support bit insert instruction, like arm, aarch64
 655      etc.  */
 656   if (bitsize + bitnum > unit && bitnum < unit)
 657     {
 658       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
 659                "destination object, data truncated into %wu-bit",
 660                bitsize, unit - bitnum);
 661       bitsize = unit - bitnum;
 662     }
 663
 664   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 665      "backwards" from the size of the unit we are inserting into.
 666      Otherwise, we count bits from the most significant on a
 667      BYTES/BITS_BIG_ENDIAN machine.  */
 668
 669   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 670     bitnum = unit - bitsize - bitnum;
 671
 672   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 673   value1 = value;
 674   if (value_mode != op_mode)
 675     {
 676       if (GET_MODE_BITSIZE (value_mode) >= bitsize)
 677         {
 678           rtx tmp;
 679           /* Optimization: Don't bother really extending VALUE
 680              if it has all the bits we will actually use.  However,
 681              if we must narrow it, be sure we do it correctly.  */
 682
 683           if (GET_MODE_SIZE (value_mode) < GET_MODE_SIZE (op_mode))
 684             {
 685               tmp = simplify_subreg (op_mode, value1, value_mode, 0);
 686               if (! tmp)
 687                 tmp = simplify_gen_subreg (op_mode,
 688                                            force_reg (value_mode, value1),
 689                                            value_mode, 0);
 690             }
 691           else
 692             {
 693               tmp = gen_lowpart_if_possible (op_mode, value1);
 694               if (! tmp)
 695                 tmp = gen_lowpart (op_mode, force_reg (value_mode, value1));
 696             }
 697           value1 = tmp;
 698         }
 699       else if (CONST_INT_P (value))
 700         value1 = gen_int_mode (INTVAL (value), op_mode);
 701       else
 702         /* Parse phase is supposed to make VALUE's data type
 703            match that of the component reference, which is a type
 704            at least as wide as the field; so VALUE should have
 705            a mode that corresponds to that type.  */
 706         gcc_assert (CONSTANT_P (value));
 707     }
 708
 709   create_fixed_operand (&ops[0], xop0);
 710   create_integer_operand (&ops[1], bitsize);
 711   create_integer_operand (&ops[2], bitnum);
 712   create_input_operand (&ops[3], value1, op_mode);
 713   if (maybe_expand_insn (insv->icode, 4, ops))
 714     {
 715       if (copy_back)
 716         convert_move (op0, xop0, true);
 717       return true;
 718     }
 719   delete_insns_since (last);
 720   return false;
 721 }
 722
 723 /* A subroutine of store_bit_field, with the same arguments.  Return true
 724    if the operation could be implemented.
 725
 726    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 727    no other way of implementing the operation.  If FALLBACK_P is false,
 728    return false instead.  */
 729
 730 static bool
 731 store_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
 732                    poly_uint64 bitregion_start, poly_uint64 bitregion_end,
 733                    machine_mode fieldmode,
 734                    rtx value, bool reverse, bool fallback_p)
 735 {
 736   rtx op0 = str_rtx;
 737
 738   while (GET_CODE (op0) == SUBREG)
 739     {
 740       bitnum += subreg_memory_offset (op0) * BITS_PER_UNIT;
 741       op0 = SUBREG_REG (op0);
 742     }
 743
 744   /* No action is needed if the target is a register and if the field
 745      lies completely outside that register.  This can occur if the source
 746      code contains an out-of-bounds access to a small array.  */
 747   if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
 748     return true;
 749
 750   /* Use vec_set patterns for inserting parts of vectors whenever
 751      available.  */
 752   machine_mode outermode = GET_MODE (op0);
 753   scalar_mode innermode = GET_MODE_INNER (outermode);
 754   poly_uint64 pos;
 755   if (VECTOR_MODE_P (outermode)
 756       && !MEM_P (op0)
 757       && optab_handler (vec_set_optab, outermode) != CODE_FOR_nothing
 758       && fieldmode == innermode
 759       && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
 760       && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
 761     {
 762       class expand_operand ops[3];
 763       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 764
 765       create_fixed_operand (&ops[0], op0);
 766       create_input_operand (&ops[1], value, innermode);
 767       create_integer_operand (&ops[2], pos);
 768       if (maybe_expand_insn (icode, 3, ops))
 769         return true;
 770     }
 771
 772   /* If the target is a register, overwriting the entire object, or storing
 773      a full-word or multi-word field can be done with just a SUBREG.  */
 774   if (!MEM_P (op0)
 775       && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode)))
 776     {
 777       /* Use the subreg machinery either to narrow OP0 to the required
 778          words or to cope with mode punning between equal-sized modes.
 779          In the latter case, use subreg on the rhs side, not lhs.  */
 780       rtx sub;
 781       HOST_WIDE_INT regnum;
 782       poly_uint64 regsize = REGMODE_NATURAL_SIZE (GET_MODE (op0));
 783       if (known_eq (bitnum, 0U)
 784           && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
 785         {
 786           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 787           if (sub)
 788             {
 789               if (reverse)
 790                 sub = flip_storage_order (GET_MODE (op0), sub);
 791               emit_move_insn (op0, sub);
 792               return true;
 793             }
 794         }
 795       else if (constant_multiple_p (bitnum, regsize * BITS_PER_UNIT, &regnum)
 796                && multiple_p (bitsize, regsize * BITS_PER_UNIT))
 797         {
 798           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 799                                      regnum * regsize);
 800           if (sub)
 801             {
 802               if (reverse)
 803                 value = flip_storage_order (fieldmode, value);
 804               emit_move_insn (sub, value);
 805               return true;
 806             }
 807         }
 808     }
 809
 810   /* If the target is memory, storing any naturally aligned field can be
 811      done with a simple store.  For targets that support fast unaligned
 812      memory, any naturally sized, unit aligned field can be done directly.  */
 813   poly_uint64 bytenum;
 814   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode, &bytenum))
 815     {
 816       op0 = adjust_bitfield_address (op0, fieldmode, bytenum);
 817       if (reverse)
 818         value = flip_storage_order (fieldmode, value);
 819       emit_move_insn (op0, value);
 820       return true;
 821     }
 822
 823   /* It's possible we'll need to handle other cases here for
 824      polynomial bitnum and bitsize.  */
 825
 826   /* From here on we need to be looking at a fixed-size insertion.  */
 827   unsigned HOST_WIDE_INT ibitsize = bitsize.to_constant ();
 828   unsigned HOST_WIDE_INT ibitnum = bitnum.to_constant ();
 829
 830   /* Make sure we are playing with integral modes.  Pun with subregs
 831      if we aren't.  This must come after the entire register case above,
 832      since that case is valid for any mode.  The following cases are only
 833      valid for integral modes.  */
 834   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
 835   scalar_int_mode imode;
 836   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
 837     {
 838       if (MEM_P (op0))
 839         op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
 840                                             0, MEM_SIZE (op0));
 841       else
 842         op0 = gen_lowpart (op0_mode.require (), op0);
 843     }
 844
 845   return store_integral_bit_field (op0, op0_mode, ibitsize, ibitnum,
 846                                    bitregion_start, bitregion_end,
 847                                    fieldmode, value, reverse, fallback_p);
 848 }
 849
 850 /* Subroutine of store_bit_field_1, with the same arguments, except
 851    that BITSIZE and BITNUM are constant.  Handle cases specific to
 852    integral modes.  If OP0_MODE is defined, it is the mode of OP0,
 853    otherwise OP0 is a BLKmode MEM.  */
 854
 855 static bool
 856 store_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
 857                           unsigned HOST_WIDE_INT bitsize,
 858                           unsigned HOST_WIDE_INT bitnum,
 859                           poly_uint64 bitregion_start,
 860                           poly_uint64 bitregion_end,
 861                           machine_mode fieldmode,
 862                           rtx value, bool reverse, bool fallback_p)
 863 {
 864   /* Storing an lsb-aligned field in a register
 865      can be done with a movstrict instruction.  */
 866
 867   if (!MEM_P (op0)
 868       && !reverse
 869       && lowpart_bit_field_p (bitnum, bitsize, op0_mode.require ())
 870       && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode))
 871       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 872     {
 873       class expand_operand ops[2];
 874       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 875       rtx arg0 = op0;
 876       unsigned HOST_WIDE_INT subreg_off;
 877
 878       if (GET_CODE (arg0) == SUBREG)
 879         {
 880           /* Else we've got some float mode source being extracted into
 881              a different float mode destination -- this combination of
 882              subregs results in Severe Tire Damage.  */
 883           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 884                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 885                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 886           arg0 = SUBREG_REG (arg0);
 887         }
 888
 889       subreg_off = bitnum / BITS_PER_UNIT;
 890       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 891         {
 892           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 893
 894           create_fixed_operand (&ops[0], arg0);
 895           /* Shrink the source operand to FIELDMODE.  */
 896           create_convert_operand_to (&ops[1], value, fieldmode, false);
 897           if (maybe_expand_insn (icode, 2, ops))
 898             return true;
 899         }
 900     }
 901
 902   /* Handle fields bigger than a word.  */
 903
 904   if (bitsize > BITS_PER_WORD)
 905     {
 906       /* Here we transfer the words of the field
 907          in the order least significant first.
 908          This is because the most significant word is the one which may
 909          be less than full.
 910          However, only do that if the value is not BLKmode.  */
 911
 912       const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 913       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 914       unsigned int i;
 915       rtx_insn *last;
 916
 917       /* This is the mode we must force value to, so that there will be enough
 918          subwords to extract.  Note that fieldmode will often (always?) be
 919          VOIDmode, because that is what store_field uses to indicate that this
 920          is a bit field, but passing VOIDmode to operand_subword_force
 921          is not allowed.
 922
 923          The mode must be fixed-size, since insertions into variable-sized
 924          objects are meant to be handled before calling this function.  */
 925       fixed_size_mode value_mode = as_a <fixed_size_mode> (GET_MODE (value));
 926       if (value_mode == VOIDmode)
 927         value_mode = smallest_int_mode_for_size (nwords * BITS_PER_WORD);
 928
 929       last = get_last_insn ();
 930       for (i = 0; i < nwords; i++)
 931         {
 932           /* If I is 0, use the low-order word in both field and target;
 933              if I is 1, use the next to lowest word; and so on.  */
 934           unsigned int wordnum = (backwards
 935                                   ? GET_MODE_SIZE (value_mode) / UNITS_PER_WORD
 936                                   - i - 1
 937                                   : i);
 938           unsigned int bit_offset = (backwards ^ reverse
 939                                      ? MAX ((int) bitsize - ((int) i + 1)
 940                                             * BITS_PER_WORD,
 941                                             0)
 942                                      : (int) i * BITS_PER_WORD);
 943           rtx value_word = operand_subword_force (value, wordnum, value_mode);
 944           unsigned HOST_WIDE_INT new_bitsize =
 945             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 946
 947           /* If the remaining chunk doesn't have full wordsize we have
 948              to make sure that for big-endian machines the higher order
 949              bits are used.  */
 950           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 951             {
 952               int shift = BITS_PER_WORD - new_bitsize;
 953               rtx shift_rtx = gen_int_shift_amount (word_mode, shift);
 954               value_word = simplify_expand_binop (word_mode, lshr_optab,
 955                                                   value_word, shift_rtx,
 956                                                   NULL_RTX, true,
 957                                                   OPTAB_LIB_WIDEN);
 958             }
 959
 960           if (!store_bit_field_1 (op0, new_bitsize,
 961                                   bitnum + bit_offset,
 962                                   bitregion_start, bitregion_end,
 963                                   word_mode,
 964                                   value_word, reverse, fallback_p))
 965             {
 966               delete_insns_since (last);
 967               return false;
 968             }
 969         }
 970       return true;
 971     }
 972
 973   /* If VALUE has a floating-point or complex mode, access it as an
 974      integer of the corresponding size.  This can occur on a machine
 975      with 64 bit registers that uses SFmode for float.  It can also
 976      occur for unaligned float or complex fields.  */
 977   rtx orig_value = value;
 978   scalar_int_mode value_mode;
 979   if (GET_MODE (value) == VOIDmode)
 980     /* By this point we've dealt with values that are bigger than a word,
 981        so word_mode is a conservatively correct choice.  */
 982     value_mode = word_mode;
 983   else if (!is_a <scalar_int_mode> (GET_MODE (value), &value_mode))
 984     {
 985       value_mode = int_mode_for_mode (GET_MODE (value)).require ();
 986       value = gen_reg_rtx (value_mode);
 987       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 988     }
 989
 990   /* If OP0 is a multi-word register, narrow it to the affected word.
 991      If the region spans two words, defer to store_split_bit_field.
 992      Don't do this if op0 is a single hard register wider than word
 993      such as a float or vector register.  */
 994   if (!MEM_P (op0)
 995       && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD
 996       && (!REG_P (op0)
 997           || !HARD_REGISTER_P (op0)
 998           || hard_regno_nregs (REGNO (op0), op0_mode.require ()) != 1))
 999     {
1000       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1001         {
1002           if (!fallback_p)
1003             return false;
1004
1005           store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1006                                  bitregion_start, bitregion_end,
1007                                  value, value_mode, reverse);
1008           return true;
1009         }
1010       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1011                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1012       gcc_assert (op0);
1013       op0_mode = word_mode;
1014       bitnum %= BITS_PER_WORD;
1015     }
1016
1017   /* From here on we can assume that the field to be stored in fits
1018      within a word.  If the destination is a register, it too fits
1019      in a word.  */
1020
1021   extraction_insn insv;
1022   if (!MEM_P (op0)
1023       && !reverse
1024       && get_best_reg_extraction_insn (&insv, EP_insv,
1025                                        GET_MODE_BITSIZE (op0_mode.require ()),
1026                                        fieldmode)
1027       && store_bit_field_using_insv (&insv, op0, op0_mode,
1028                                      bitsize, bitnum, value, value_mode))
1029     return true;
1030
1031   /* If OP0 is a memory, try copying it to a register and seeing if a
1032      cheap register alternative is available.  */
1033   if (MEM_P (op0) && !reverse)
1034     {
1035       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1036                                         fieldmode)
1037           && store_bit_field_using_insv (&insv, op0, op0_mode,
1038                                          bitsize, bitnum, value, value_mode))
1039         return true;
1040
1041       rtx_insn *last = get_last_insn ();
1042
1043       /* Try loading part of OP0 into a register, inserting the bitfield
1044          into that, and then copying the result back to OP0.  */
1045       unsigned HOST_WIDE_INT bitpos;
1046       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1047                                                bitregion_start, bitregion_end,
1048                                                fieldmode, &bitpos);
1049       if (xop0)
1050         {
1051           rtx tempreg = copy_to_reg (xop0);
1052           if (store_bit_field_1 (tempreg, bitsize, bitpos,
1053                                  bitregion_start, bitregion_end,
1054                                  fieldmode, orig_value, reverse, false))
1055             {
1056               emit_move_insn (xop0, tempreg);
1057               return true;
1058             }
1059           delete_insns_since (last);
1060         }
1061     }
1062
1063   if (!fallback_p)
1064     return false;
1065
1066   store_fixed_bit_field (op0, op0_mode, bitsize, bitnum, bitregion_start,
1067                          bitregion_end, value, value_mode, reverse);
1068   return true;
1069 }
1070
1071 /* Generate code to store value from rtx VALUE
1072    into a bit-field within structure STR_RTX
1073    containing BITSIZE bits starting at bit BITNUM.
1074
1075    BITREGION_START is bitpos of the first bitfield in this region.
1076    BITREGION_END is the bitpos of the ending bitfield in this region.
1077    These two fields are 0, if the C++ memory model does not apply,
1078    or we are not interested in keeping track of bitfield regions.
1079
1080    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1081
1082    If REVERSE is true, the store is to be done in reverse order.  */
1083
1084 void
1085 store_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1086                  poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1087                  machine_mode fieldmode,
1088                  rtx value, bool reverse)
1089 {
1090   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1091   unsigned HOST_WIDE_INT ibitsize = 0, ibitnum = 0;
1092   scalar_int_mode int_mode;
1093   if (bitsize.is_constant (&ibitsize)
1094       && bitnum.is_constant (&ibitnum)
1095       && is_a <scalar_int_mode> (fieldmode, &int_mode)
1096       && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum, int_mode,
1097                                      bitregion_start, bitregion_end))
1098     {
1099       /* Storing of a full word can be done with a simple store.
1100          We know here that the field can be accessed with one single
1101          instruction.  For targets that support unaligned memory,
1102          an unaligned access may be necessary.  */
1103       if (ibitsize == GET_MODE_BITSIZE (int_mode))
1104         {
1105           str_rtx = adjust_bitfield_address (str_rtx, int_mode,
1106                                              ibitnum / BITS_PER_UNIT);
1107           if (reverse)
1108             value = flip_storage_order (int_mode, value);
1109           gcc_assert (ibitnum % BITS_PER_UNIT == 0);
1110           emit_move_insn (str_rtx, value);
1111         }
1112       else
1113         {
1114           rtx temp;
1115
1116           str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize,
1117                                           ibitnum, &ibitnum);
1118           gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
1119           temp = copy_to_reg (str_rtx);
1120           if (!store_bit_field_1 (temp, ibitsize, ibitnum, 0, 0,
1121                                   int_mode, value, reverse, true))
1122             gcc_unreachable ();
1123
1124           emit_move_insn (str_rtx, temp);
1125         }
1126
1127       return;
1128     }
1129
1130   /* Under the C++0x memory model, we must not touch bits outside the
1131      bit region.  Adjust the address to start at the beginning of the
1132      bit region.  */
1133   if (MEM_P (str_rtx) && maybe_ne (bitregion_start, 0U))
1134     {
1135       scalar_int_mode best_mode;
1136       machine_mode addr_mode = VOIDmode;
1137
1138       poly_uint64 offset = exact_div (bitregion_start, BITS_PER_UNIT);
1139       bitnum -= bitregion_start;
1140       poly_int64 size = bits_to_bytes_round_up (bitnum + bitsize);
1141       bitregion_end -= bitregion_start;
1142       bitregion_start = 0;
1143       if (bitsize.is_constant (&ibitsize)
1144           && bitnum.is_constant (&ibitnum)
1145           && get_best_mode (ibitsize, ibitnum,
1146                             bitregion_start, bitregion_end,
1147                             MEM_ALIGN (str_rtx), INT_MAX,
1148                             MEM_VOLATILE_P (str_rtx), &best_mode))
1149         addr_mode = best_mode;
1150       str_rtx = adjust_bitfield_address_size (str_rtx, addr_mode,
1151                                               offset, size);
1152     }
1153
1154   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1155                           bitregion_start, bitregion_end,
1156                           fieldmode, value, reverse, true))
1157     gcc_unreachable ();
1158 }
1159 \f
1160 /* Use shifts and boolean operations to store VALUE into a bit field of
1161    width BITSIZE in OP0, starting at bit BITNUM.  If OP0_MODE is defined,
1162    it is the mode of OP0, otherwise OP0 is a BLKmode MEM.  VALUE_MODE is
1163    the mode of VALUE.
1164
1165    If REVERSE is true, the store is to be done in reverse order.  */
1166
1167 static void
1168 store_fixed_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1169                        unsigned HOST_WIDE_INT bitsize,
1170                        unsigned HOST_WIDE_INT bitnum,
1171                        poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1172                        rtx value, scalar_int_mode value_mode, bool reverse)
1173 {
1174   /* There is a case not handled here:
1175      a structure with a known alignment of just a halfword
1176      and a field split across two aligned halfwords within the structure.
1177      Or likewise a structure with a known alignment of just a byte
1178      and a field split across two bytes.
1179      Such cases are not supposed to be able to occur.  */
1180
1181   scalar_int_mode best_mode;
1182   if (MEM_P (op0))
1183     {
1184       unsigned int max_bitsize = BITS_PER_WORD;
1185       scalar_int_mode imode;
1186       if (op0_mode.exists (&imode) && GET_MODE_BITSIZE (imode) < max_bitsize)
1187         max_bitsize = GET_MODE_BITSIZE (imode);
1188
1189       if (!get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1190                           MEM_ALIGN (op0), max_bitsize, MEM_VOLATILE_P (op0),
1191                           &best_mode))
1192         {
1193           /* The only way this should occur is if the field spans word
1194              boundaries.  */
1195           store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1196                                  bitregion_start, bitregion_end,
1197                                  value, value_mode, reverse);
1198           return;
1199         }
1200
1201       op0 = narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, &bitnum);
1202     }
1203   else
1204     best_mode = op0_mode.require ();
1205
1206   store_fixed_bit_field_1 (op0, best_mode, bitsize, bitnum,
1207                            value, value_mode, reverse);
1208 }
1209
1210 /* Helper function for store_fixed_bit_field, stores
1211    the bit field always using MODE, which is the mode of OP0.  The other
1212    arguments are as for store_fixed_bit_field.  */
1213
1214 static void
1215 store_fixed_bit_field_1 (rtx op0, scalar_int_mode mode,
1216                          unsigned HOST_WIDE_INT bitsize,
1217                          unsigned HOST_WIDE_INT bitnum,
1218                          rtx value, scalar_int_mode value_mode, bool reverse)
1219 {
1220   rtx temp;
1221   int all_zero = 0;
1222   int all_one = 0;
1223
1224   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1225      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1226
1227   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1228     /* BITNUM is the distance between our msb
1229        and that of the containing datum.
1230        Convert it to the distance from the lsb.  */
1231     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1232
1233   /* Now BITNUM is always the distance between our lsb
1234      and that of OP0.  */
1235
1236   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1237      we must first convert its mode to MODE.  */
1238
1239   if (CONST_INT_P (value))
1240     {
1241       unsigned HOST_WIDE_INT v = UINTVAL (value);
1242
1243       if (bitsize < HOST_BITS_PER_WIDE_INT)
1244         v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1245
1246       if (v == 0)
1247         all_zero = 1;
1248       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1249                 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1250                || (bitsize == HOST_BITS_PER_WIDE_INT
1251                    && v == HOST_WIDE_INT_M1U))
1252         all_one = 1;
1253
1254       value = lshift_value (mode, v, bitnum);
1255     }
1256   else
1257     {
1258       int must_and = (GET_MODE_BITSIZE (value_mode) != bitsize
1259                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1260
1261       if (value_mode != mode)
1262         value = convert_to_mode (mode, value, 1);
1263
1264       if (must_and)
1265         value = expand_binop (mode, and_optab, value,
1266                               mask_rtx (mode, 0, bitsize, 0),
1267                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1268       if (bitnum > 0)
1269         value = expand_shift (LSHIFT_EXPR, mode, value,
1270                               bitnum, NULL_RTX, 1);
1271     }
1272
1273   if (reverse)
1274     value = flip_storage_order (mode, value);
1275
1276   /* Now clear the chosen bits in OP0,
1277      except that if VALUE is -1 we need not bother.  */
1278   /* We keep the intermediates in registers to allow CSE to combine
1279      consecutive bitfield assignments.  */
1280
1281   temp = force_reg (mode, op0);
1282
1283   if (! all_one)
1284     {
1285       rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1286       if (reverse)
1287         mask = flip_storage_order (mode, mask);
1288       temp = expand_binop (mode, and_optab, temp, mask,
1289                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1290       temp = force_reg (mode, temp);
1291     }
1292
1293   /* Now logical-or VALUE into OP0, unless it is zero.  */
1294
1295   if (! all_zero)
1296     {
1297       temp = expand_binop (mode, ior_optab, temp, value,
1298                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1299       temp = force_reg (mode, temp);
1300     }
1301
1302   if (op0 != temp)
1303     {
1304       op0 = copy_rtx (op0);
1305       emit_move_insn (op0, temp);
1306     }
1307 }
1308 \f
1309 /* Store a bit field that is split across multiple accessible memory objects.
1310
1311    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1312    BITSIZE is the field width; BITPOS the position of its first bit
1313    (within the word).
1314    VALUE is the value to store, which has mode VALUE_MODE.
1315    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
1316    a BLKmode MEM.
1317
1318    If REVERSE is true, the store is to be done in reverse order.
1319
1320    This does not yet handle fields wider than BITS_PER_WORD.  */
1321
1322 static void
1323 store_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1324                        unsigned HOST_WIDE_INT bitsize,
1325                        unsigned HOST_WIDE_INT bitpos,
1326                        poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1327                        rtx value, scalar_int_mode value_mode, bool reverse)
1328 {
1329   unsigned int unit, total_bits, bitsdone = 0;
1330
1331   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1332      much at a time.  */
1333   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1334     unit = BITS_PER_WORD;
1335   else
1336     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1337
1338   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1339      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1340      again, and we will mutually recurse forever.  */
1341   if (MEM_P (op0) && op0_mode.exists ())
1342     unit = MIN (unit, GET_MODE_BITSIZE (op0_mode.require ()));
1343
1344   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1345      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1346      that VALUE might be a floating-point constant.  */
1347   if (CONSTANT_P (value) && !CONST_INT_P (value))
1348     {
1349       rtx word = gen_lowpart_common (word_mode, value);
1350
1351       if (word && (value != word))
1352         value = word;
1353       else
1354         value = gen_lowpart_common (word_mode, force_reg (value_mode, value));
1355       value_mode = word_mode;
1356     }
1357
1358   total_bits = GET_MODE_BITSIZE (value_mode);
1359
1360   while (bitsdone < bitsize)
1361     {
1362       unsigned HOST_WIDE_INT thissize;
1363       unsigned HOST_WIDE_INT thispos;
1364       unsigned HOST_WIDE_INT offset;
1365       rtx part;
1366
1367       offset = (bitpos + bitsdone) / unit;
1368       thispos = (bitpos + bitsdone) % unit;
1369
1370       /* When region of bytes we can touch is restricted, decrease
1371          UNIT close to the end of the region as needed.  If op0 is a REG
1372          or SUBREG of REG, don't do this, as there can't be data races
1373          on a register and we can expand shorter code in some cases.  */
1374       if (maybe_ne (bitregion_end, 0U)
1375           && unit > BITS_PER_UNIT
1376           && maybe_gt (bitpos + bitsdone - thispos + unit, bitregion_end + 1)
1377           && !REG_P (op0)
1378           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1379         {
1380           unit = unit / 2;
1381           continue;
1382         }
1383
1384       /* THISSIZE must not overrun a word boundary.  Otherwise,
1385          store_fixed_bit_field will call us again, and we will mutually
1386          recurse forever.  */
1387       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1388       thissize = MIN (thissize, unit - thispos);
1389
1390       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1391         {
1392           /* Fetch successively less significant portions.  */
1393           if (CONST_INT_P (value))
1394             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1395                              >> (bitsize - bitsdone - thissize))
1396                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1397           /* Likewise, but the source is little-endian.  */
1398           else if (reverse)
1399             part = extract_fixed_bit_field (word_mode, value, value_mode,
1400                                             thissize,
1401                                             bitsize - bitsdone - thissize,
1402                                             NULL_RTX, 1, false);
1403           else
1404             /* The args are chosen so that the last part includes the
1405                lsb.  Give extract_bit_field the value it needs (with
1406                endianness compensation) to fetch the piece we want.  */
1407             part = extract_fixed_bit_field (word_mode, value, value_mode,
1408                                             thissize,
1409                                             total_bits - bitsize + bitsdone,
1410                                             NULL_RTX, 1, false);
1411         }
1412       else
1413         {
1414           /* Fetch successively more significant portions.  */
1415           if (CONST_INT_P (value))
1416             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1417                              >> bitsdone)
1418                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1419           /* Likewise, but the source is big-endian.  */
1420           else if (reverse)
1421             part = extract_fixed_bit_field (word_mode, value, value_mode,
1422                                             thissize,
1423                                             total_bits - bitsdone - thissize,
1424                                             NULL_RTX, 1, false);
1425           else
1426             part = extract_fixed_bit_field (word_mode, value, value_mode,
1427                                             thissize, bitsdone, NULL_RTX,
1428                                             1, false);
1429         }
1430
1431       /* If OP0 is a register, then handle OFFSET here.  */
1432       rtx op0_piece = op0;
1433       opt_scalar_int_mode op0_piece_mode = op0_mode;
1434       if (SUBREG_P (op0) || REG_P (op0))
1435         {
1436           scalar_int_mode imode;
1437           if (op0_mode.exists (&imode)
1438               && GET_MODE_SIZE (imode) < UNITS_PER_WORD)
1439             {
1440               if (offset)
1441                 op0_piece = const0_rtx;
1442             }
1443           else
1444             {
1445               op0_piece = operand_subword_force (op0,
1446                                                  offset * unit / BITS_PER_WORD,
1447                                                  GET_MODE (op0));
1448               op0_piece_mode = word_mode;
1449             }
1450           offset &= BITS_PER_WORD / unit - 1;
1451         }
1452
1453       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1454          it is just an out-of-bounds access.  Ignore it.  */
1455       if (op0_piece != const0_rtx)
1456         store_fixed_bit_field (op0_piece, op0_piece_mode, thissize,
1457                                offset * unit + thispos, bitregion_start,
1458                                bitregion_end, part, word_mode, reverse);
1459       bitsdone += thissize;
1460     }
1461 }
1462 \f
1463 /* A subroutine of extract_bit_field_1 that converts return value X
1464    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1465    to extract_bit_field.  */
1466
1467 static rtx
1468 convert_extracted_bit_field (rtx x, machine_mode mode,
1469                              machine_mode tmode, bool unsignedp)
1470 {
1471   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1472     return x;
1473
1474   /* If the x mode is not a scalar integral, first convert to the
1475      integer mode of that size and then access it as a floating-point
1476      value via a SUBREG.  */
1477   if (!SCALAR_INT_MODE_P (tmode))
1478     {
1479       scalar_int_mode int_mode = int_mode_for_mode (tmode).require ();
1480       x = convert_to_mode (int_mode, x, unsignedp);
1481       x = force_reg (int_mode, x);
1482       return gen_lowpart (tmode, x);
1483     }
1484
1485   return convert_to_mode (tmode, x, unsignedp);
1486 }
1487
1488 /* Try to use an ext(z)v pattern to extract a field from OP0.
1489    Return the extracted value on success, otherwise return null.
1490    EXTV describes the extraction instruction to use.  If OP0_MODE
1491    is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM.
1492    The other arguments are as for extract_bit_field.  */
1493
1494 static rtx
1495 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1496                               opt_scalar_int_mode op0_mode,
1497                               unsigned HOST_WIDE_INT bitsize,
1498                               unsigned HOST_WIDE_INT bitnum,
1499                               int unsignedp, rtx target,
1500                               machine_mode mode, machine_mode tmode)
1501 {
1502   class expand_operand ops[4];
1503   rtx spec_target = target;
1504   rtx spec_target_subreg = 0;
1505   scalar_int_mode ext_mode = extv->field_mode;
1506   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1507
1508   if (bitsize == 0 || unit < bitsize)
1509     return NULL_RTX;
1510
1511   if (MEM_P (op0))
1512     /* Get a reference to the first byte of the field.  */
1513     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1514                                 &bitnum);
1515   else
1516     {
1517       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1518       if (BYTES_BIG_ENDIAN)
1519         bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
1520
1521       /* If op0 is a register, we need it in EXT_MODE to make it
1522          acceptable to the format of ext(z)v.  */
1523       if (GET_CODE (op0) == SUBREG && op0_mode.require () != ext_mode)
1524         return NULL_RTX;
1525       if (REG_P (op0) && op0_mode.require () != ext_mode)
1526         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1527     }
1528
1529   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1530      "backwards" from the size of the unit we are extracting from.
1531      Otherwise, we count bits from the most significant on a
1532      BYTES/BITS_BIG_ENDIAN machine.  */
1533
1534   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1535     bitnum = unit - bitsize - bitnum;
1536
1537   if (target == 0)
1538     target = spec_target = gen_reg_rtx (tmode);
1539
1540   if (GET_MODE (target) != ext_mode)
1541     {
1542       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1543          between the mode of the extraction (word_mode) and the target
1544          mode.  Instead, create a temporary and use convert_move to set
1545          the target.  */
1546       if (REG_P (target)
1547           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1548         {
1549           target = gen_lowpart (ext_mode, target);
1550           if (partial_subreg_p (GET_MODE (spec_target), ext_mode))
1551             spec_target_subreg = target;
1552         }
1553       else
1554         target = gen_reg_rtx (ext_mode);
1555     }
1556
1557   create_output_operand (&ops[0], target, ext_mode);
1558   create_fixed_operand (&ops[1], op0);
1559   create_integer_operand (&ops[2], bitsize);
1560   create_integer_operand (&ops[3], bitnum);
1561   if (maybe_expand_insn (extv->icode, 4, ops))
1562     {
1563       target = ops[0].value;
1564       if (target == spec_target)
1565         return target;
1566       if (target == spec_target_subreg)
1567         return spec_target;
1568       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1569     }
1570   return NULL_RTX;
1571 }
1572
1573 /* See whether it would be valid to extract the part of OP0 described
1574    by BITNUM and BITSIZE into a value of mode MODE using a subreg
1575    operation.  Return the subreg if so, otherwise return null.  */
1576
1577 static rtx
1578 extract_bit_field_as_subreg (machine_mode mode, rtx op0,
1579                              poly_uint64 bitsize, poly_uint64 bitnum)
1580 {
1581   poly_uint64 bytenum;
1582   if (multiple_p (bitnum, BITS_PER_UNIT, &bytenum)
1583       && known_eq (bitsize, GET_MODE_BITSIZE (mode))
1584       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1585       && TRULY_NOOP_TRUNCATION_MODES_P (mode, GET_MODE (op0)))
1586     return simplify_gen_subreg (mode, op0, GET_MODE (op0), bytenum);
1587   return NULL_RTX;
1588 }
1589
1590 /* A subroutine of extract_bit_field, with the same arguments.
1591    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1592    if we can find no other means of implementing the operation.
1593    if FALLBACK_P is false, return NULL instead.  */
1594
1595 static rtx
1596 extract_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1597                      int unsignedp, rtx target, machine_mode mode,
1598                      machine_mode tmode, bool reverse, bool fallback_p,
1599                      rtx *alt_rtl)
1600 {
1601   rtx op0 = str_rtx;
1602   machine_mode mode1;
1603
1604   if (tmode == VOIDmode)
1605     tmode = mode;
1606
1607   while (GET_CODE (op0) == SUBREG)
1608     {
1609       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1610       op0 = SUBREG_REG (op0);
1611     }
1612
1613   /* If we have an out-of-bounds access to a register, just return an
1614      uninitialized register of the required mode.  This can occur if the
1615      source code contains an out-of-bounds access to a small array.  */
1616   if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
1617     return gen_reg_rtx (tmode);
1618
1619   if (REG_P (op0)
1620       && mode == GET_MODE (op0)
1621       && known_eq (bitnum, 0U)
1622       && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
1623     {
1624       if (reverse)
1625         op0 = flip_storage_order (mode, op0);
1626       /* We're trying to extract a full register from itself.  */
1627       return op0;
1628     }
1629
1630   /* First try to check for vector from vector extractions.  */
1631   if (VECTOR_MODE_P (GET_MODE (op0))
1632       && !MEM_P (op0)
1633       && VECTOR_MODE_P (tmode)
1634       && known_eq (bitsize, GET_MODE_BITSIZE (tmode))
1635       && maybe_gt (GET_MODE_SIZE (GET_MODE (op0)), GET_MODE_SIZE (tmode)))
1636     {
1637       machine_mode new_mode = GET_MODE (op0);
1638       if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1639         {
1640           scalar_mode inner_mode = GET_MODE_INNER (tmode);
1641           poly_uint64 nunits;
1642           if (!multiple_p (GET_MODE_BITSIZE (GET_MODE (op0)),
1643                            GET_MODE_UNIT_BITSIZE (tmode), &nunits)
1644               || !mode_for_vector (inner_mode, nunits).exists (&new_mode)
1645               || !VECTOR_MODE_P (new_mode)
1646               || maybe_ne (GET_MODE_SIZE (new_mode),
1647                            GET_MODE_SIZE (GET_MODE (op0)))
1648               || GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode)
1649               || !targetm.vector_mode_supported_p (new_mode))
1650             new_mode = VOIDmode;
1651         }
1652       poly_uint64 pos;
1653       if (new_mode != VOIDmode
1654           && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1655               != CODE_FOR_nothing)
1656           && multiple_p (bitnum, GET_MODE_BITSIZE (tmode), &pos))
1657         {
1658           class expand_operand ops[3];
1659           machine_mode outermode = new_mode;
1660           machine_mode innermode = tmode;
1661           enum insn_code icode
1662             = convert_optab_handler (vec_extract_optab, outermode, innermode);
1663
1664           if (new_mode != GET_MODE (op0))
1665             op0 = gen_lowpart (new_mode, op0);
1666           create_output_operand (&ops[0], target, innermode);
1667           ops[0].target = 1;
1668           create_input_operand (&ops[1], op0, outermode);
1669           create_integer_operand (&ops[2], pos);
1670           if (maybe_expand_insn (icode, 3, ops))
1671             {
1672               if (alt_rtl && ops[0].target)
1673                 *alt_rtl = target;
1674               target = ops[0].value;
1675               if (GET_MODE (target) != mode)
1676                 return gen_lowpart (tmode, target);
1677               return target;
1678             }
1679         }
1680     }
1681
1682   /* See if we can get a better vector mode before extracting.  */
1683   if (VECTOR_MODE_P (GET_MODE (op0))
1684       && !MEM_P (op0)
1685       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1686     {
1687       machine_mode new_mode;
1688
1689       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1690         new_mode = MIN_MODE_VECTOR_FLOAT;
1691       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1692         new_mode = MIN_MODE_VECTOR_FRACT;
1693       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1694         new_mode = MIN_MODE_VECTOR_UFRACT;
1695       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1696         new_mode = MIN_MODE_VECTOR_ACCUM;
1697       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1698         new_mode = MIN_MODE_VECTOR_UACCUM;
1699       else
1700         new_mode = MIN_MODE_VECTOR_INT;
1701
1702       FOR_EACH_MODE_FROM (new_mode, new_mode)
1703         if (known_eq (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (GET_MODE (op0)))
1704             && known_eq (GET_MODE_UNIT_SIZE (new_mode), GET_MODE_SIZE (tmode))
1705             && targetm.vector_mode_supported_p (new_mode))
1706           break;
1707       if (new_mode != VOIDmode)
1708         op0 = gen_lowpart (new_mode, op0);
1709     }
1710
1711   /* Use vec_extract patterns for extracting parts of vectors whenever
1712      available.  If that fails, see whether the current modes and bitregion
1713      give a natural subreg.  */
1714   machine_mode outermode = GET_MODE (op0);
1715   if (VECTOR_MODE_P (outermode) && !MEM_P (op0))
1716     {
1717       scalar_mode innermode = GET_MODE_INNER (outermode);
1718       enum insn_code icode
1719         = convert_optab_handler (vec_extract_optab, outermode, innermode);
1720       poly_uint64 pos;
1721       if (icode != CODE_FOR_nothing
1722           && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
1723           && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
1724         {
1725           class expand_operand ops[3];
1726
1727           create_output_operand (&ops[0], target, innermode);
1728           ops[0].target = 1;
1729           create_input_operand (&ops[1], op0, outermode);
1730           create_integer_operand (&ops[2], pos);
1731           if (maybe_expand_insn (icode, 3, ops))
1732             {
1733               if (alt_rtl && ops[0].target)
1734                 *alt_rtl = target;
1735               target = ops[0].value;
1736               if (GET_MODE (target) != mode)
1737                 return gen_lowpart (tmode, target);
1738               return target;
1739             }
1740         }
1741       /* Using subregs is useful if we're extracting one register vector
1742          from a multi-register vector.  extract_bit_field_as_subreg checks
1743          for valid bitsize and bitnum, so we don't need to do that here.  */
1744       if (VECTOR_MODE_P (mode))
1745         {
1746           rtx sub = extract_bit_field_as_subreg (mode, op0, bitsize, bitnum);
1747           if (sub)
1748             return sub;
1749         }
1750     }
1751
1752   /* Make sure we are playing with integral modes.  Pun with subregs
1753      if we aren't.  */
1754   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
1755   scalar_int_mode imode;
1756   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
1757     {
1758       if (MEM_P (op0))
1759         op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
1760                                             0, MEM_SIZE (op0));
1761       else if (op0_mode.exists (&imode))
1762         {
1763           op0 = gen_lowpart (imode, op0);
1764
1765           /* If we got a SUBREG, force it into a register since we
1766              aren't going to be able to do another SUBREG on it.  */
1767           if (GET_CODE (op0) == SUBREG)
1768             op0 = force_reg (imode, op0);
1769         }
1770       else
1771         {
1772           poly_int64 size = GET_MODE_SIZE (GET_MODE (op0));
1773           rtx mem = assign_stack_temp (GET_MODE (op0), size);
1774           emit_move_insn (mem, op0);
1775           op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1776         }
1777     }
1778
1779   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1780      If that's wrong, the solution is to test for it and set TARGET to 0
1781      if needed.  */
1782
1783   /* Get the mode of the field to use for atomic access or subreg
1784      conversion.  */
1785   if (!SCALAR_INT_MODE_P (tmode)
1786       || !mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0).exists (&mode1))
1787     mode1 = mode;
1788   gcc_assert (mode1 != BLKmode);
1789
1790   /* Extraction of a full MODE1 value can be done with a subreg as long
1791      as the least significant bit of the value is the least significant
1792      bit of either OP0 or a word of OP0.  */
1793   if (!MEM_P (op0) && !reverse)
1794     {
1795       rtx sub = extract_bit_field_as_subreg (mode1, op0, bitsize, bitnum);
1796       if (sub)
1797         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1798     }
1799
1800   /* Extraction of a full MODE1 value can be done with a load as long as
1801      the field is on a byte boundary and is sufficiently aligned.  */
1802   poly_uint64 bytenum;
1803   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1, &bytenum))
1804     {
1805       op0 = adjust_bitfield_address (op0, mode1, bytenum);
1806       if (reverse)
1807         op0 = flip_storage_order (mode1, op0);
1808       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1809     }
1810
1811   /* If we have a memory source and a non-constant bit offset, restrict
1812      the memory to the referenced bytes.  This is a worst-case fallback
1813      but is useful for things like vector booleans.  */
1814   if (MEM_P (op0) && !bitnum.is_constant ())
1815     {
1816       bytenum = bits_to_bytes_round_down (bitnum);
1817       bitnum = num_trailing_bits (bitnum);
1818       poly_uint64 bytesize = bits_to_bytes_round_up (bitnum + bitsize);
1819       op0 = adjust_bitfield_address_size (op0, BLKmode, bytenum, bytesize);
1820       op0_mode = opt_scalar_int_mode ();
1821     }
1822
1823   /* It's possible we'll need to handle other cases here for
1824      polynomial bitnum and bitsize.  */
1825
1826   /* From here on we need to be looking at a fixed-size insertion.  */
1827   return extract_integral_bit_field (op0, op0_mode, bitsize.to_constant (),
1828                                      bitnum.to_constant (), unsignedp,
1829                                      target, mode, tmode, reverse, fallback_p);
1830 }
1831
1832 /* Subroutine of extract_bit_field_1, with the same arguments, except
1833    that BITSIZE and BITNUM are constant.  Handle cases specific to
1834    integral modes.  If OP0_MODE is defined, it is the mode of OP0,
1835    otherwise OP0 is a BLKmode MEM.  */
1836
1837 static rtx
1838 extract_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1839                             unsigned HOST_WIDE_INT bitsize,
1840                             unsigned HOST_WIDE_INT bitnum, int unsignedp,
1841                             rtx target, machine_mode mode, machine_mode tmode,
1842                             bool reverse, bool fallback_p)
1843 {
1844   /* Handle fields bigger than a word.  */
1845
1846   if (bitsize > BITS_PER_WORD)
1847     {
1848       /* Here we transfer the words of the field
1849          in the order least significant first.
1850          This is because the most significant word is the one which may
1851          be less than full.  */
1852
1853       const bool backwards = WORDS_BIG_ENDIAN;
1854       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1855       unsigned int i;
1856       rtx_insn *last;
1857
1858       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1859         target = gen_reg_rtx (mode);
1860
1861       /* In case we're about to clobber a base register or something
1862          (see gcc.c-torture/execute/20040625-1.c).   */
1863       if (reg_mentioned_p (target, op0))
1864         target = gen_reg_rtx (mode);
1865
1866       /* Indicate for flow that the entire target reg is being set.  */
1867       emit_clobber (target);
1868
1869       /* The mode must be fixed-size, since extract_bit_field_1 handles
1870          extractions from variable-sized objects before calling this
1871          function.  */
1872       unsigned int target_size
1873         = GET_MODE_SIZE (GET_MODE (target)).to_constant ();
1874       last = get_last_insn ();
1875       for (i = 0; i < nwords; i++)
1876         {
1877           /* If I is 0, use the low-order word in both field and target;
1878              if I is 1, use the next to lowest word; and so on.  */
1879           /* Word number in TARGET to use.  */
1880           unsigned int wordnum
1881             = (backwards ? target_size / UNITS_PER_WORD - i - 1 : i);
1882           /* Offset from start of field in OP0.  */
1883           unsigned int bit_offset = (backwards ^ reverse
1884                                      ? MAX ((int) bitsize - ((int) i + 1)
1885                                             * BITS_PER_WORD,
1886                                             0)
1887                                      : (int) i * BITS_PER_WORD);
1888           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1889           rtx result_part
1890             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1891                                              bitsize - i * BITS_PER_WORD),
1892                                    bitnum + bit_offset, 1, target_part,
1893                                    mode, word_mode, reverse, fallback_p, NULL);
1894
1895           gcc_assert (target_part);
1896           if (!result_part)
1897             {
1898               delete_insns_since (last);
1899               return NULL;
1900             }
1901
1902           if (result_part != target_part)
1903             emit_move_insn (target_part, result_part);
1904         }
1905
1906       if (unsignedp)
1907         {
1908           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1909              need to be zero'd out.  */
1910           if (target_size > nwords * UNITS_PER_WORD)
1911             {
1912               unsigned int i, total_words;
1913
1914               total_words = target_size / UNITS_PER_WORD;
1915               for (i = nwords; i < total_words; i++)
1916                 emit_move_insn
1917                   (operand_subword (target,
1918                                     backwards ? total_words - i - 1 : i,
1919                                     1, VOIDmode),
1920                    const0_rtx);
1921             }
1922           return target;
1923         }
1924
1925       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1926       target = expand_shift (LSHIFT_EXPR, mode, target,
1927                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1928       return expand_shift (RSHIFT_EXPR, mode, target,
1929                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1930     }
1931
1932   /* If OP0 is a multi-word register, narrow it to the affected word.
1933      If the region spans two words, defer to extract_split_bit_field.  */
1934   if (!MEM_P (op0) && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD)
1935     {
1936       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1937         {
1938           if (!fallback_p)
1939             return NULL_RTX;
1940           target = extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
1941                                             unsignedp, reverse);
1942           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1943         }
1944       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1945                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1946       op0_mode = word_mode;
1947       bitnum %= BITS_PER_WORD;
1948     }
1949
1950   /* From here on we know the desired field is smaller than a word.
1951      If OP0 is a register, it too fits within a word.  */
1952   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1953   extraction_insn extv;
1954   if (!MEM_P (op0)
1955       && !reverse
1956       /* ??? We could limit the structure size to the part of OP0 that
1957          contains the field, with appropriate checks for endianness
1958          and TARGET_TRULY_NOOP_TRUNCATION.  */
1959       && get_best_reg_extraction_insn (&extv, pattern,
1960                                        GET_MODE_BITSIZE (op0_mode.require ()),
1961                                        tmode))
1962     {
1963       rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
1964                                                  bitsize, bitnum,
1965                                                  unsignedp, target, mode,
1966                                                  tmode);
1967       if (result)
1968         return result;
1969     }
1970
1971   /* If OP0 is a memory, try copying it to a register and seeing if a
1972      cheap register alternative is available.  */
1973   if (MEM_P (op0) & !reverse)
1974     {
1975       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1976                                         tmode))
1977         {
1978           rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
1979                                                      bitsize, bitnum,
1980                                                      unsignedp, target, mode,
1981                                                      tmode);
1982           if (result)
1983             return result;
1984         }
1985
1986       rtx_insn *last = get_last_insn ();
1987
1988       /* Try loading part of OP0 into a register and extracting the
1989          bitfield from that.  */
1990       unsigned HOST_WIDE_INT bitpos;
1991       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1992                                                0, 0, tmode, &bitpos);
1993       if (xop0)
1994         {
1995           xop0 = copy_to_reg (xop0);
1996           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1997                                             unsignedp, target,
1998                                             mode, tmode, reverse, false, NULL);
1999           if (result)
2000             return result;
2001           delete_insns_since (last);
2002         }
2003     }
2004
2005   if (!fallback_p)
2006     return NULL;
2007
2008   /* Find a correspondingly-sized integer field, so we can apply
2009      shifts and masks to it.  */
2010   scalar_int_mode int_mode;
2011   if (!int_mode_for_mode (tmode).exists (&int_mode))
2012     /* If this fails, we should probably push op0 out to memory and then
2013        do a load.  */
2014     int_mode = int_mode_for_mode (mode).require ();
2015
2016   target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize,
2017                                     bitnum, target, unsignedp, reverse);
2018
2019   /* Complex values must be reversed piecewise, so we need to undo the global
2020      reversal, convert to the complex mode and reverse again.  */
2021   if (reverse && COMPLEX_MODE_P (tmode))
2022     {
2023       target = flip_storage_order (int_mode, target);
2024       target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2025       target = flip_storage_order (tmode, target);
2026     }
2027   else
2028     target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2029
2030   return target;
2031 }
2032
2033 /* Generate code to extract a byte-field from STR_RTX
2034    containing BITSIZE bits, starting at BITNUM,
2035    and put it in TARGET if possible (if TARGET is nonzero).
2036    Regardless of TARGET, we return the rtx for where the value is placed.
2037
2038    STR_RTX is the structure containing the byte (a REG or MEM).
2039    UNSIGNEDP is nonzero if this is an unsigned bit field.
2040    MODE is the natural mode of the field value once extracted.
2041    TMODE is the mode the caller would like the value to have;
2042    but the value may be returned with type MODE instead.
2043
2044    If REVERSE is true, the extraction is to be done in reverse order.
2045
2046    If a TARGET is specified and we can store in it at no extra cost,
2047    we do so, and return TARGET.
2048    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
2049    if they are equally easy.
2050
2051    If the result can be stored at TARGET, and ALT_RTL is non-NULL,
2052    then *ALT_RTL is set to TARGET (before legitimziation).  */
2053
2054 rtx
2055 extract_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
2056                    int unsignedp, rtx target, machine_mode mode,
2057                    machine_mode tmode, bool reverse, rtx *alt_rtl)
2058 {
2059   machine_mode mode1;
2060
2061   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
2062   if (maybe_ne (GET_MODE_BITSIZE (GET_MODE (str_rtx)), 0))
2063     mode1 = GET_MODE (str_rtx);
2064   else if (target && maybe_ne (GET_MODE_BITSIZE (GET_MODE (target)), 0))
2065     mode1 = GET_MODE (target);
2066   else
2067     mode1 = tmode;
2068
2069   unsigned HOST_WIDE_INT ibitsize, ibitnum;
2070   scalar_int_mode int_mode;
2071   if (bitsize.is_constant (&ibitsize)
2072       && bitnum.is_constant (&ibitnum)
2073       && is_a <scalar_int_mode> (mode1, &int_mode)
2074       && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum,
2075                                      int_mode, 0, 0))
2076     {
2077       /* Extraction of a full INT_MODE value can be done with a simple load.
2078          We know here that the field can be accessed with one single
2079          instruction.  For targets that support unaligned memory,
2080          an unaligned access may be necessary.  */
2081       if (ibitsize == GET_MODE_BITSIZE (int_mode))
2082         {
2083           rtx result = adjust_bitfield_address (str_rtx, int_mode,
2084                                                 ibitnum / BITS_PER_UNIT);
2085           if (reverse)
2086             result = flip_storage_order (int_mode, result);
2087           gcc_assert (ibitnum % BITS_PER_UNIT == 0);
2088           return convert_extracted_bit_field (result, mode, tmode, unsignedp);
2089         }
2090
2091       str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize, ibitnum,
2092                                       &ibitnum);
2093       gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
2094       str_rtx = copy_to_reg (str_rtx);
2095       return extract_bit_field_1 (str_rtx, ibitsize, ibitnum, unsignedp,
2096                                   target, mode, tmode, reverse, true, alt_rtl);
2097     }
2098
2099   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
2100                               target, mode, tmode, reverse, true, alt_rtl);
2101 }
2102 \f
2103 /* Use shifts and boolean operations to extract a field of BITSIZE bits
2104    from bit BITNUM of OP0.  If OP0_MODE is defined, it is the mode of OP0,
2105    otherwise OP0 is a BLKmode MEM.
2106
2107    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
2108    If REVERSE is true, the extraction is to be done in reverse order.
2109
2110    If TARGET is nonzero, attempts to store the value there
2111    and return TARGET, but this is not guaranteed.
2112    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
2113
2114 static rtx
2115 extract_fixed_bit_field (machine_mode tmode, rtx op0,
2116                          opt_scalar_int_mode op0_mode,
2117                          unsigned HOST_WIDE_INT bitsize,
2118                          unsigned HOST_WIDE_INT bitnum, rtx target,
2119                          int unsignedp, bool reverse)
2120 {
2121   scalar_int_mode mode;
2122   if (MEM_P (op0))
2123     {
2124       if (!get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
2125                           BITS_PER_WORD, MEM_VOLATILE_P (op0), &mode))
2126         /* The only way this should occur is if the field spans word
2127            boundaries.  */
2128         return extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
2129                                         unsignedp, reverse);
2130
2131       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
2132     }
2133   else
2134     mode = op0_mode.require ();
2135
2136   return extract_fixed_bit_field_1 (tmode, op0, mode, bitsize, bitnum,
2137                                     target, unsignedp, reverse);
2138 }
2139
2140 /* Helper function for extract_fixed_bit_field, extracts
2141    the bit field always using MODE, which is the mode of OP0.
2142    The other arguments are as for extract_fixed_bit_field.  */
2143
2144 static rtx
2145 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, scalar_int_mode mode,
2146                            unsigned HOST_WIDE_INT bitsize,
2147                            unsigned HOST_WIDE_INT bitnum, rtx target,
2148                            int unsignedp, bool reverse)
2149 {
2150   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2151      for invalid input, such as extract equivalent of f5 from
2152      gcc.dg/pr48335-2.c.  */
2153
2154   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2155     /* BITNUM is the distance between our msb and that of OP0.
2156        Convert it to the distance from the lsb.  */
2157     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2158
2159   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2160      We have reduced the big-endian case to the little-endian case.  */
2161   if (reverse)
2162     op0 = flip_storage_order (mode, op0);
2163
2164   if (unsignedp)
2165     {
2166       if (bitnum)
2167         {
2168           /* If the field does not already start at the lsb,
2169              shift it so it does.  */
2170           /* Maybe propagate the target for the shift.  */
2171           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2172           if (tmode != mode)
2173             subtarget = 0;
2174           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2175         }
2176       /* Convert the value to the desired mode.  TMODE must also be a
2177          scalar integer for this conversion to make sense, since we
2178          shouldn't reinterpret the bits.  */
2179       scalar_int_mode new_mode = as_a <scalar_int_mode> (tmode);
2180       if (mode != new_mode)
2181         op0 = convert_to_mode (new_mode, op0, 1);
2182
2183       /* Unless the msb of the field used to be the msb when we shifted,
2184          mask out the upper bits.  */
2185
2186       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2187         return expand_binop (new_mode, and_optab, op0,
2188                              mask_rtx (new_mode, 0, bitsize, 0),
2189                              target, 1, OPTAB_LIB_WIDEN);
2190       return op0;
2191     }
2192
2193   /* To extract a signed bit-field, first shift its msb to the msb of the word,
2194      then arithmetic-shift its lsb to the lsb of the word.  */
2195   op0 = force_reg (mode, op0);
2196
2197   /* Find the narrowest integer mode that contains the field.  */
2198
2199   opt_scalar_int_mode mode_iter;
2200   FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
2201     if (GET_MODE_BITSIZE (mode_iter.require ()) >= bitsize + bitnum)
2202       break;
2203
2204   mode = mode_iter.require ();
2205   op0 = convert_to_mode (mode, op0, 0);
2206
2207   if (mode != tmode)
2208     target = 0;
2209
2210   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2211     {
2212       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2213       /* Maybe propagate the target for the shift.  */
2214       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2215       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2216     }
2217
2218   return expand_shift (RSHIFT_EXPR, mode, op0,
2219                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2220 }
2221
2222 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2223    VALUE << BITPOS.  */
2224
2225 static rtx
2226 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2227               int bitpos)
2228 {
2229   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2230 }
2231 \f
2232 /* Extract a bit field that is split across two words
2233    and return an RTX for the result.
2234
2235    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2236    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2237    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2238    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
2239    a BLKmode MEM.
2240
2241    If REVERSE is true, the extraction is to be done in reverse order.  */
2242
2243 static rtx
2244 extract_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
2245                          unsigned HOST_WIDE_INT bitsize,
2246                          unsigned HOST_WIDE_INT bitpos, int unsignedp,
2247                          bool reverse)
2248 {
2249   unsigned int unit;
2250   unsigned int bitsdone = 0;
2251   rtx result = NULL_RTX;
2252   int first = 1;
2253
2254   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2255      much at a time.  */
2256   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2257     unit = BITS_PER_WORD;
2258   else
2259     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2260
2261   while (bitsdone < bitsize)
2262     {
2263       unsigned HOST_WIDE_INT thissize;
2264       rtx part;
2265       unsigned HOST_WIDE_INT thispos;
2266       unsigned HOST_WIDE_INT offset;
2267
2268       offset = (bitpos + bitsdone) / unit;
2269       thispos = (bitpos + bitsdone) % unit;
2270
2271       /* THISSIZE must not overrun a word boundary.  Otherwise,
2272          extract_fixed_bit_field will call us again, and we will mutually
2273          recurse forever.  */
2274       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2275       thissize = MIN (thissize, unit - thispos);
2276
2277       /* If OP0 is a register, then handle OFFSET here.  */
2278       rtx op0_piece = op0;
2279       opt_scalar_int_mode op0_piece_mode = op0_mode;
2280       if (SUBREG_P (op0) || REG_P (op0))
2281         {
2282           op0_piece = operand_subword_force (op0, offset, op0_mode.require ());
2283           op0_piece_mode = word_mode;
2284           offset = 0;
2285         }
2286
2287       /* Extract the parts in bit-counting order,
2288          whose meaning is determined by BYTES_PER_UNIT.
2289          OFFSET is in UNITs, and UNIT is in bits.  */
2290       part = extract_fixed_bit_field (word_mode, op0_piece, op0_piece_mode,
2291                                       thissize, offset * unit + thispos,
2292                                       0, 1, reverse);
2293       bitsdone += thissize;
2294
2295       /* Shift this part into place for the result.  */
2296       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2297         {
2298           if (bitsize != bitsdone)
2299             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2300                                  bitsize - bitsdone, 0, 1);
2301         }
2302       else
2303         {
2304           if (bitsdone != thissize)
2305             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2306                                  bitsdone - thissize, 0, 1);
2307         }
2308
2309       if (first)
2310         result = part;
2311       else
2312         /* Combine the parts with bitwise or.  This works
2313            because we extracted each part as an unsigned bit field.  */
2314         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2315                                OPTAB_LIB_WIDEN);
2316
2317       first = 0;
2318     }
2319
2320   /* Unsigned bit field: we are done.  */
2321   if (unsignedp)
2322     return result;
2323   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2324   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2325                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2326   return expand_shift (RSHIFT_EXPR, word_mode, result,
2327                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2328 }
2329 \f
2330 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2331    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2332    MODE, fill the upper bits with zeros.  Fail if the layout of either
2333    mode is unknown (as for CC modes) or if the extraction would involve
2334    unprofitable mode punning.  Return the value on success, otherwise
2335    return null.
2336
2337    This is different from gen_lowpart* in these respects:
2338
2339      - the returned value must always be considered an rvalue
2340
2341      - when MODE is wider than SRC_MODE, the extraction involves
2342        a zero extension
2343
2344      - when MODE is smaller than SRC_MODE, the extraction involves
2345        a truncation (and is thus subject to TARGET_TRULY_NOOP_TRUNCATION).
2346
2347    In other words, this routine performs a computation, whereas the
2348    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2349    operations.  */
2350
2351 rtx
2352 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2353 {
2354   scalar_int_mode int_mode, src_int_mode;
2355
2356   if (mode == src_mode)
2357     return src;
2358
2359   if (CONSTANT_P (src))
2360     {
2361       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2362          fails, it will happily create (subreg (symbol_ref)) or similar
2363          invalid SUBREGs.  */
2364       poly_uint64 byte = subreg_lowpart_offset (mode, src_mode);
2365       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2366       if (ret)
2367         return ret;
2368
2369       if (GET_MODE (src) == VOIDmode
2370           || !validate_subreg (mode, src_mode, src, byte))
2371         return NULL_RTX;
2372
2373       src = force_reg (GET_MODE (src), src);
2374       return gen_rtx_SUBREG (mode, src, byte);
2375     }
2376
2377   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2378     return NULL_RTX;
2379
2380   if (known_eq (GET_MODE_BITSIZE (mode), GET_MODE_BITSIZE (src_mode))
2381       && targetm.modes_tieable_p (mode, src_mode))
2382     {
2383       rtx x = gen_lowpart_common (mode, src);
2384       if (x)
2385         return x;
2386     }
2387
2388   if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
2389       || !int_mode_for_mode (mode).exists (&int_mode))
2390     return NULL_RTX;
2391
2392   if (!targetm.modes_tieable_p (src_int_mode, src_mode))
2393     return NULL_RTX;
2394   if (!targetm.modes_tieable_p (int_mode, mode))
2395     return NULL_RTX;
2396
2397   src = gen_lowpart (src_int_mode, src);
2398   if (!validate_subreg (int_mode, src_int_mode, src,
2399                         subreg_lowpart_offset (int_mode, src_int_mode)))
2400     return NULL_RTX;
2401
2402   src = convert_modes (int_mode, src_int_mode, src, true);
2403   src = gen_lowpart (mode, src);
2404   return src;
2405 }
2406 \f
2407 /* Add INC into TARGET.  */
2408
2409 void
2410 expand_inc (rtx target, rtx inc)
2411 {
2412   rtx value = expand_binop (GET_MODE (target), add_optab,
2413                             target, inc,
2414                             target, 0, OPTAB_LIB_WIDEN);
2415   if (value != target)
2416     emit_move_insn (target, value);
2417 }
2418
2419 /* Subtract DEC from TARGET.  */
2420
2421 void
2422 expand_dec (rtx target, rtx dec)
2423 {
2424   rtx value = expand_binop (GET_MODE (target), sub_optab,
2425                             target, dec,
2426                             target, 0, OPTAB_LIB_WIDEN);
2427   if (value != target)
2428     emit_move_insn (target, value);
2429 }
2430 \f
2431 /* Output a shift instruction for expression code CODE,
2432    with SHIFTED being the rtx for the value to shift,
2433    and AMOUNT the rtx for the amount to shift by.
2434    Store the result in the rtx TARGET, if that is convenient.
2435    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2436    Return the rtx for where the value is.
2437    If that cannot be done, abort the compilation unless MAY_FAIL is true,
2438    in which case 0 is returned.  */
2439
2440 static rtx
2441 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2442                 rtx amount, rtx target, int unsignedp, bool may_fail = false)
2443 {
2444   rtx op1, temp = 0;
2445   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2446   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2447   optab lshift_optab = ashl_optab;
2448   optab rshift_arith_optab = ashr_optab;
2449   optab rshift_uns_optab = lshr_optab;
2450   optab lrotate_optab = rotl_optab;
2451   optab rrotate_optab = rotr_optab;
2452   machine_mode op1_mode;
2453   scalar_mode scalar_mode = GET_MODE_INNER (mode);
2454   int attempt;
2455   bool speed = optimize_insn_for_speed_p ();
2456
2457   op1 = amount;
2458   op1_mode = GET_MODE (op1);
2459
2460   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2461      shift amount is a vector, use the vector/vector shift patterns.  */
2462   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2463     {
2464       lshift_optab = vashl_optab;
2465       rshift_arith_optab = vashr_optab;
2466       rshift_uns_optab = vlshr_optab;
2467       lrotate_optab = vrotl_optab;
2468       rrotate_optab = vrotr_optab;
2469     }
2470
2471   /* Previously detected shift-counts computed by NEGATE_EXPR
2472      and shifted in the other direction; but that does not work
2473      on all machines.  */
2474
2475   if (SHIFT_COUNT_TRUNCATED)
2476     {
2477       if (CONST_INT_P (op1)
2478           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2479               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2480         op1 = gen_int_shift_amount (mode,
2481                                     (unsigned HOST_WIDE_INT) INTVAL (op1)
2482                                     % GET_MODE_BITSIZE (scalar_mode));
2483       else if (GET_CODE (op1) == SUBREG
2484                && subreg_lowpart_p (op1)
2485                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2486                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2487         op1 = SUBREG_REG (op1);
2488     }
2489
2490   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2491      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2492      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2493      amount instead.  */
2494   if (rotate
2495       && CONST_INT_P (op1)
2496       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2497                    GET_MODE_BITSIZE (scalar_mode) - 1))
2498     {
2499       op1 = gen_int_shift_amount (mode, (GET_MODE_BITSIZE (scalar_mode)
2500                                          - INTVAL (op1)));
2501       left = !left;
2502       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2503     }
2504
2505   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2506      Note that this is not the case for bigger values.  For instance a rotation
2507      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2508      0x04030201 (bswapsi).  */
2509   if (rotate
2510       && CONST_INT_P (op1)
2511       && INTVAL (op1) == BITS_PER_UNIT
2512       && GET_MODE_SIZE (scalar_mode) == 2
2513       && optab_handler (bswap_optab, mode) != CODE_FOR_nothing)
2514     return expand_unop (mode, bswap_optab, shifted, NULL_RTX, unsignedp);
2515
2516   if (op1 == const0_rtx)
2517     return shifted;
2518
2519   /* Check whether its cheaper to implement a left shift by a constant
2520      bit count by a sequence of additions.  */
2521   if (code == LSHIFT_EXPR
2522       && CONST_INT_P (op1)
2523       && INTVAL (op1) > 0
2524       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2525       && INTVAL (op1) < MAX_BITS_PER_WORD
2526       && (shift_cost (speed, mode, INTVAL (op1))
2527           > INTVAL (op1) * add_cost (speed, mode))
2528       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2529     {
2530       int i;
2531       for (i = 0; i < INTVAL (op1); i++)
2532         {
2533           temp = force_reg (mode, shifted);
2534           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2535                                   unsignedp, OPTAB_LIB_WIDEN);
2536         }
2537       return shifted;
2538     }
2539
2540   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2541     {
2542       enum optab_methods methods;
2543
2544       if (attempt == 0)
2545         methods = OPTAB_DIRECT;
2546       else if (attempt == 1)
2547         methods = OPTAB_WIDEN;
2548       else
2549         methods = OPTAB_LIB_WIDEN;
2550
2551       if (rotate)
2552         {
2553           /* Widening does not work for rotation.  */
2554           if (methods == OPTAB_WIDEN)
2555             continue;
2556           else if (methods == OPTAB_LIB_WIDEN)
2557             {
2558               /* If we have been unable to open-code this by a rotation,
2559                  do it as the IOR of two shifts.  I.e., to rotate A
2560                  by N bits, compute
2561                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2562                  where C is the bitsize of A.
2563
2564                  It is theoretically possible that the target machine might
2565                  not be able to perform either shift and hence we would
2566                  be making two libcalls rather than just the one for the
2567                  shift (similarly if IOR could not be done).  We will allow
2568                  this extremely unlikely lossage to avoid complicating the
2569                  code below.  */
2570
2571               rtx subtarget = target == shifted ? 0 : target;
2572               rtx new_amount, other_amount;
2573               rtx temp1;
2574
2575               new_amount = op1;
2576               if (op1 == const0_rtx)
2577                 return shifted;
2578               else if (CONST_INT_P (op1))
2579                 other_amount = gen_int_shift_amount
2580                   (mode, GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2581               else
2582                 {
2583                   other_amount
2584                     = simplify_gen_unary (NEG, GET_MODE (op1),
2585                                           op1, GET_MODE (op1));
2586                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2587                   other_amount
2588                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2589                                            gen_int_mode (mask, GET_MODE (op1)));
2590                 }
2591
2592               shifted = force_reg (mode, shifted);
2593
2594               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2595                                      mode, shifted, new_amount, 0, 1);
2596               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2597                                       mode, shifted, other_amount,
2598                                       subtarget, 1);
2599               return expand_binop (mode, ior_optab, temp, temp1, target,
2600                                    unsignedp, methods);
2601             }
2602
2603           temp = expand_binop (mode,
2604                                left ? lrotate_optab : rrotate_optab,
2605                                shifted, op1, target, unsignedp, methods);
2606         }
2607       else if (unsignedp)
2608         temp = expand_binop (mode,
2609                              left ? lshift_optab : rshift_uns_optab,
2610                              shifted, op1, target, unsignedp, methods);
2611
2612       /* Do arithmetic shifts.
2613          Also, if we are going to widen the operand, we can just as well
2614          use an arithmetic right-shift instead of a logical one.  */
2615       if (temp == 0 && ! rotate
2616           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2617         {
2618           enum optab_methods methods1 = methods;
2619
2620           /* If trying to widen a log shift to an arithmetic shift,
2621              don't accept an arithmetic shift of the same size.  */
2622           if (unsignedp)
2623             methods1 = OPTAB_MUST_WIDEN;
2624
2625           /* Arithmetic shift */
2626
2627           temp = expand_binop (mode,
2628                                left ? lshift_optab : rshift_arith_optab,
2629                                shifted, op1, target, unsignedp, methods1);
2630         }
2631
2632       /* We used to try extzv here for logical right shifts, but that was
2633          only useful for one machine, the VAX, and caused poor code
2634          generation there for lshrdi3, so the code was deleted and a
2635          define_expand for lshrsi3 was added to vax.md.  */
2636     }
2637
2638   gcc_assert (temp != NULL_RTX || may_fail);
2639   return temp;
2640 }
2641
2642 /* Output a shift instruction for expression code CODE,
2643    with SHIFTED being the rtx for the value to shift,
2644    and AMOUNT the amount to shift by.
2645    Store the result in the rtx TARGET, if that is convenient.
2646    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2647    Return the rtx for where the value is.  */
2648
2649 rtx
2650 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2651               poly_int64 amount, rtx target, int unsignedp)
2652 {
2653   return expand_shift_1 (code, mode, shifted,
2654                          gen_int_shift_amount (mode, amount),
2655                          target, unsignedp);
2656 }
2657
2658 /* Likewise, but return 0 if that cannot be done.  */
2659
2660 static rtx
2661 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2662                     int amount, rtx target, int unsignedp)
2663 {
2664   return expand_shift_1 (code, mode,
2665                          shifted, GEN_INT (amount), target, unsignedp, true);
2666 }
2667
2668 /* Output a shift instruction for expression code CODE,
2669    with SHIFTED being the rtx for the value to shift,
2670    and AMOUNT the tree for the amount to shift by.
2671    Store the result in the rtx TARGET, if that is convenient.
2672    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2673    Return the rtx for where the value is.  */
2674
2675 rtx
2676 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2677                        tree amount, rtx target, int unsignedp)
2678 {
2679   return expand_shift_1 (code, mode,
2680                          shifted, expand_normal (amount), target, unsignedp);
2681 }
2682
2683 \f
2684 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2685                         const struct mult_cost *, machine_mode mode);
2686 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2687                               const struct algorithm *, enum mult_variant);
2688 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2689 static rtx extract_high_half (scalar_int_mode, rtx);
2690 static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int);
2691 static rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx,
2692                                        int, int);
2693 /* Compute and return the best algorithm for multiplying by T.
2694    The algorithm must cost less than cost_limit
2695    If retval.cost >= COST_LIMIT, no algorithm was found and all
2696    other field of the returned struct are undefined.
2697    MODE is the machine mode of the multiplication.  */
2698
2699 static void
2700 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2701             const struct mult_cost *cost_limit, machine_mode mode)
2702 {
2703   int m;
2704   struct algorithm *alg_in, *best_alg;
2705   struct mult_cost best_cost;
2706   struct mult_cost new_limit;
2707   int op_cost, op_latency;
2708   unsigned HOST_WIDE_INT orig_t = t;
2709   unsigned HOST_WIDE_INT q;
2710   int maxm, hash_index;
2711   bool cache_hit = false;
2712   enum alg_code cache_alg = alg_zero;
2713   bool speed = optimize_insn_for_speed_p ();
2714   scalar_int_mode imode;
2715   struct alg_hash_entry *entry_ptr;
2716
2717   /* Indicate that no algorithm is yet found.  If no algorithm
2718      is found, this value will be returned and indicate failure.  */
2719   alg_out->cost.cost = cost_limit->cost + 1;
2720   alg_out->cost.latency = cost_limit->latency + 1;
2721
2722   if (cost_limit->cost < 0
2723       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2724     return;
2725
2726   /* Be prepared for vector modes.  */
2727   imode = as_a <scalar_int_mode> (GET_MODE_INNER (mode));
2728
2729   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2730
2731   /* Restrict the bits of "t" to the multiplication's mode.  */
2732   t &= GET_MODE_MASK (imode);
2733
2734   /* t == 1 can be done in zero cost.  */
2735   if (t == 1)
2736     {
2737       alg_out->ops = 1;
2738       alg_out->cost.cost = 0;
2739       alg_out->cost.latency = 0;
2740       alg_out->op[0] = alg_m;
2741       return;
2742     }
2743
2744   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2745      fail now.  */
2746   if (t == 0)
2747     {
2748       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2749         return;
2750       else
2751         {
2752           alg_out->ops = 1;
2753           alg_out->cost.cost = zero_cost (speed);
2754           alg_out->cost.latency = zero_cost (speed);
2755           alg_out->op[0] = alg_zero;
2756           return;
2757         }
2758     }
2759
2760   /* We'll be needing a couple extra algorithm structures now.  */
2761
2762   alg_in = XALLOCA (struct algorithm);
2763   best_alg = XALLOCA (struct algorithm);
2764   best_cost = *cost_limit;
2765
2766   /* Compute the hash index.  */
2767   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2768
2769   /* See if we already know what to do for T.  */
2770   entry_ptr = alg_hash_entry_ptr (hash_index);
2771   if (entry_ptr->t == t
2772       && entry_ptr->mode == mode
2773       && entry_ptr->speed == speed
2774       && entry_ptr->alg != alg_unknown)
2775     {
2776       cache_alg = entry_ptr->alg;
2777
2778       if (cache_alg == alg_impossible)
2779         {
2780           /* The cache tells us that it's impossible to synthesize
2781              multiplication by T within entry_ptr->cost.  */
2782           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2783             /* COST_LIMIT is at least as restrictive as the one
2784                recorded in the hash table, in which case we have no
2785                hope of synthesizing a multiplication.  Just
2786                return.  */
2787             return;
2788
2789           /* If we get here, COST_LIMIT is less restrictive than the
2790              one recorded in the hash table, so we may be able to
2791              synthesize a multiplication.  Proceed as if we didn't
2792              have the cache entry.  */
2793         }
2794       else
2795         {
2796           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2797             /* The cached algorithm shows that this multiplication
2798                requires more cost than COST_LIMIT.  Just return.  This
2799                way, we don't clobber this cache entry with
2800                alg_impossible but retain useful information.  */
2801             return;
2802
2803           cache_hit = true;
2804
2805           switch (cache_alg)
2806             {
2807             case alg_shift:
2808               goto do_alg_shift;
2809
2810             case alg_add_t_m2:
2811             case alg_sub_t_m2:
2812               goto do_alg_addsub_t_m2;
2813
2814             case alg_add_factor:
2815             case alg_sub_factor:
2816               goto do_alg_addsub_factor;
2817
2818             case alg_add_t2_m:
2819               goto do_alg_add_t2_m;
2820
2821             case alg_sub_t2_m:
2822               goto do_alg_sub_t2_m;
2823
2824             default:
2825               gcc_unreachable ();
2826             }
2827         }
2828     }
2829
2830   /* If we have a group of zero bits at the low-order part of T, try
2831      multiplying by the remaining bits and then doing a shift.  */
2832
2833   if ((t & 1) == 0)
2834     {
2835     do_alg_shift:
2836       m = ctz_or_zero (t); /* m = number of low zero bits */
2837       if (m < maxm)
2838         {
2839           q = t >> m;
2840           /* The function expand_shift will choose between a shift and
2841              a sequence of additions, so the observed cost is given as
2842              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2843           op_cost = m * add_cost (speed, mode);
2844           if (shift_cost (speed, mode, m) < op_cost)
2845             op_cost = shift_cost (speed, mode, m);
2846           new_limit.cost = best_cost.cost - op_cost;
2847           new_limit.latency = best_cost.latency - op_cost;
2848           synth_mult (alg_in, q, &new_limit, mode);
2849
2850           alg_in->cost.cost += op_cost;
2851           alg_in->cost.latency += op_cost;
2852           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2853             {
2854               best_cost = alg_in->cost;
2855               std::swap (alg_in, best_alg);
2856               best_alg->log[best_alg->ops] = m;
2857               best_alg->op[best_alg->ops] = alg_shift;
2858             }
2859
2860           /* See if treating ORIG_T as a signed number yields a better
2861              sequence.  Try this sequence only for a negative ORIG_T
2862              as it would be useless for a non-negative ORIG_T.  */
2863           if ((HOST_WIDE_INT) orig_t < 0)
2864             {
2865               /* Shift ORIG_T as follows because a right shift of a
2866                  negative-valued signed type is implementation
2867                  defined.  */
2868               q = ~(~orig_t >> m);
2869               /* The function expand_shift will choose between a shift
2870                  and a sequence of additions, so the observed cost is
2871                  given as MIN (m * add_cost(speed, mode),
2872                  shift_cost(speed, mode, m)).  */
2873               op_cost = m * add_cost (speed, mode);
2874               if (shift_cost (speed, mode, m) < op_cost)
2875                 op_cost = shift_cost (speed, mode, m);
2876               new_limit.cost = best_cost.cost - op_cost;
2877               new_limit.latency = best_cost.latency - op_cost;
2878               synth_mult (alg_in, q, &new_limit, mode);
2879
2880               alg_in->cost.cost += op_cost;
2881               alg_in->cost.latency += op_cost;
2882               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2883                 {
2884                   best_cost = alg_in->cost;
2885                   std::swap (alg_in, best_alg);
2886                   best_alg->log[best_alg->ops] = m;
2887                   best_alg->op[best_alg->ops] = alg_shift;
2888                 }
2889             }
2890         }
2891       if (cache_hit)
2892         goto done;
2893     }
2894
2895   /* If we have an odd number, add or subtract one.  */
2896   if ((t & 1) != 0)
2897     {
2898       unsigned HOST_WIDE_INT w;
2899
2900     do_alg_addsub_t_m2:
2901       for (w = 1; (w & t) != 0; w <<= 1)
2902         ;
2903       /* If T was -1, then W will be zero after the loop.  This is another
2904          case where T ends with ...111.  Handling this with (T + 1) and
2905          subtract 1 produces slightly better code and results in algorithm
2906          selection much faster than treating it like the ...0111 case
2907          below.  */
2908       if (w == 0
2909           || (w > 2
2910               /* Reject the case where t is 3.
2911                  Thus we prefer addition in that case.  */
2912               && t != 3))
2913         {
2914           /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2915
2916           op_cost = add_cost (speed, mode);
2917           new_limit.cost = best_cost.cost - op_cost;
2918           new_limit.latency = best_cost.latency - op_cost;
2919           synth_mult (alg_in, t + 1, &new_limit, mode);
2920
2921           alg_in->cost.cost += op_cost;
2922           alg_in->cost.latency += op_cost;
2923           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2924             {
2925               best_cost = alg_in->cost;
2926               std::swap (alg_in, best_alg);
2927               best_alg->log[best_alg->ops] = 0;
2928               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2929             }
2930         }
2931       else
2932         {
2933           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2934
2935           op_cost = add_cost (speed, mode);
2936           new_limit.cost = best_cost.cost - op_cost;
2937           new_limit.latency = best_cost.latency - op_cost;
2938           synth_mult (alg_in, t - 1, &new_limit, mode);
2939
2940           alg_in->cost.cost += op_cost;
2941           alg_in->cost.latency += op_cost;
2942           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2943             {
2944               best_cost = alg_in->cost;
2945               std::swap (alg_in, best_alg);
2946               best_alg->log[best_alg->ops] = 0;
2947               best_alg->op[best_alg->ops] = alg_add_t_m2;
2948             }
2949         }
2950
2951       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2952          quickly with a - a * n for some appropriate constant n.  */
2953       m = exact_log2 (-orig_t + 1);
2954       if (m >= 0 && m < maxm)
2955         {
2956           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2957           /* If the target has a cheap shift-and-subtract insn use
2958              that in preference to a shift insn followed by a sub insn.
2959              Assume that the shift-and-sub is "atomic" with a latency
2960              equal to it's cost, otherwise assume that on superscalar
2961              hardware the shift may be executed concurrently with the
2962              earlier steps in the algorithm.  */
2963           if (shiftsub1_cost (speed, mode, m) <= op_cost)
2964             {
2965               op_cost = shiftsub1_cost (speed, mode, m);
2966               op_latency = op_cost;
2967             }
2968           else
2969             op_latency = add_cost (speed, mode);
2970
2971           new_limit.cost = best_cost.cost - op_cost;
2972           new_limit.latency = best_cost.latency - op_latency;
2973           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2974                       &new_limit, mode);
2975
2976           alg_in->cost.cost += op_cost;
2977           alg_in->cost.latency += op_latency;
2978           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2979             {
2980               best_cost = alg_in->cost;
2981               std::swap (alg_in, best_alg);
2982               best_alg->log[best_alg->ops] = m;
2983               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2984             }
2985         }
2986
2987       if (cache_hit)
2988         goto done;
2989     }
2990
2991   /* Look for factors of t of the form
2992      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2993      If we find such a factor, we can multiply by t using an algorithm that
2994      multiplies by q, shift the result by m and add/subtract it to itself.
2995
2996      We search for large factors first and loop down, even if large factors
2997      are less probable than small; if we find a large factor we will find a
2998      good sequence quickly, and therefore be able to prune (by decreasing
2999      COST_LIMIT) the search.  */
3000
3001  do_alg_addsub_factor:
3002   for (m = floor_log2 (t - 1); m >= 2; m--)
3003     {
3004       unsigned HOST_WIDE_INT d;
3005
3006       d = (HOST_WIDE_INT_1U << m) + 1;
3007       if (t % d == 0 && t > d && m < maxm
3008           && (!cache_hit || cache_alg == alg_add_factor))
3009         {
3010           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3011           if (shiftadd_cost (speed, mode, m) <= op_cost)
3012             op_cost = shiftadd_cost (speed, mode, m);
3013
3014           op_latency = op_cost;
3015
3016
3017           new_limit.cost = best_cost.cost - op_cost;
3018           new_limit.latency = best_cost.latency - op_latency;
3019           synth_mult (alg_in, t / d, &new_limit, mode);
3020
3021           alg_in->cost.cost += op_cost;
3022           alg_in->cost.latency += op_latency;
3023           if (alg_in->cost.latency < op_cost)
3024             alg_in->cost.latency = op_cost;
3025           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3026             {
3027               best_cost = alg_in->cost;
3028               std::swap (alg_in, best_alg);
3029               best_alg->log[best_alg->ops] = m;
3030               best_alg->op[best_alg->ops] = alg_add_factor;
3031             }
3032           /* Other factors will have been taken care of in the recursion.  */
3033           break;
3034         }
3035
3036       d = (HOST_WIDE_INT_1U << m) - 1;
3037       if (t % d == 0 && t > d && m < maxm
3038           && (!cache_hit || cache_alg == alg_sub_factor))
3039         {
3040           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3041           if (shiftsub0_cost (speed, mode, m) <= op_cost)
3042             op_cost = shiftsub0_cost (speed, mode, m);
3043
3044           op_latency = op_cost;
3045
3046           new_limit.cost = best_cost.cost - op_cost;
3047           new_limit.latency = best_cost.latency - op_latency;
3048           synth_mult (alg_in, t / d, &new_limit, mode);
3049
3050           alg_in->cost.cost += op_cost;
3051           alg_in->cost.latency += op_latency;
3052           if (alg_in->cost.latency < op_cost)
3053             alg_in->cost.latency = op_cost;
3054           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3055             {
3056               best_cost = alg_in->cost;
3057               std::swap (alg_in, best_alg);
3058               best_alg->log[best_alg->ops] = m;
3059               best_alg->op[best_alg->ops] = alg_sub_factor;
3060             }
3061           break;
3062         }
3063     }
3064   if (cache_hit)
3065     goto done;
3066
3067   /* Try shift-and-add (load effective address) instructions,
3068      i.e. do a*3, a*5, a*9.  */
3069   if ((t & 1) != 0)
3070     {
3071     do_alg_add_t2_m:
3072       q = t - 1;
3073       m = ctz_hwi (q);
3074       if (q && m < maxm)
3075         {
3076           op_cost = shiftadd_cost (speed, mode, m);
3077           new_limit.cost = best_cost.cost - op_cost;
3078           new_limit.latency = best_cost.latency - op_cost;
3079           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
3080
3081           alg_in->cost.cost += op_cost;
3082           alg_in->cost.latency += op_cost;
3083           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3084             {
3085               best_cost = alg_in->cost;
3086               std::swap (alg_in, best_alg);
3087               best_alg->log[best_alg->ops] = m;
3088               best_alg->op[best_alg->ops] = alg_add_t2_m;
3089             }
3090         }
3091       if (cache_hit)
3092         goto done;
3093
3094     do_alg_sub_t2_m:
3095       q = t + 1;
3096       m = ctz_hwi (q);
3097       if (q && m < maxm)
3098         {
3099           op_cost = shiftsub0_cost (speed, mode, m);
3100           new_limit.cost = best_cost.cost - op_cost;
3101           new_limit.latency = best_cost.latency - op_cost;
3102           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
3103
3104           alg_in->cost.cost += op_cost;
3105           alg_in->cost.latency += op_cost;
3106           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3107             {
3108               best_cost = alg_in->cost;
3109               std::swap (alg_in, best_alg);
3110               best_alg->log[best_alg->ops] = m;
3111               best_alg->op[best_alg->ops] = alg_sub_t2_m;
3112             }
3113         }
3114       if (cache_hit)
3115         goto done;
3116     }
3117
3118  done:
3119   /* If best_cost has not decreased, we have not found any algorithm.  */
3120   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
3121     {
3122       /* We failed to find an algorithm.  Record alg_impossible for
3123          this case (that is, <T, MODE, COST_LIMIT>) so that next time
3124          we are asked to find an algorithm for T within the same or
3125          lower COST_LIMIT, we can immediately return to the
3126          caller.  */
3127       entry_ptr->t = t;
3128       entry_ptr->mode = mode;
3129       entry_ptr->speed = speed;
3130       entry_ptr->alg = alg_impossible;
3131       entry_ptr->cost = *cost_limit;
3132       return;
3133     }
3134
3135   /* Cache the result.  */
3136   if (!cache_hit)
3137     {
3138       entry_ptr->t = t;
3139       entry_ptr->mode = mode;
3140       entry_ptr->speed = speed;
3141       entry_ptr->alg = best_alg->op[best_alg->ops];
3142       entry_ptr->cost.cost = best_cost.cost;
3143       entry_ptr->cost.latency = best_cost.latency;
3144     }
3145
3146   /* If we are getting a too long sequence for `struct algorithm'
3147      to record, make this search fail.  */
3148   if (best_alg->ops == MAX_BITS_PER_WORD)
3149     return;
3150
3151   /* Copy the algorithm from temporary space to the space at alg_out.
3152      We avoid using structure assignment because the majority of
3153      best_alg is normally undefined, and this is a critical function.  */
3154   alg_out->ops = best_alg->ops + 1;
3155   alg_out->cost = best_cost;
3156   memcpy (alg_out->op, best_alg->op,
3157           alg_out->ops * sizeof *alg_out->op);
3158   memcpy (alg_out->log, best_alg->log,
3159           alg_out->ops * sizeof *alg_out->log);
3160 }
3161 \f
3162 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3163    Try three variations:
3164
3165        - a shift/add sequence based on VAL itself
3166        - a shift/add sequence based on -VAL, followed by a negation
3167        - a shift/add sequence based on VAL - 1, followed by an addition.
3168
3169    Return true if the cheapest of these cost less than MULT_COST,
3170    describing the algorithm in *ALG and final fixup in *VARIANT.  */
3171
3172 bool
3173 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3174                      struct algorithm *alg, enum mult_variant *variant,
3175                      int mult_cost)
3176 {
3177   struct algorithm alg2;
3178   struct mult_cost limit;
3179   int op_cost;
3180   bool speed = optimize_insn_for_speed_p ();
3181
3182   /* Fail quickly for impossible bounds.  */
3183   if (mult_cost < 0)
3184     return false;
3185
3186   /* Ensure that mult_cost provides a reasonable upper bound.
3187      Any constant multiplication can be performed with less
3188      than 2 * bits additions.  */
3189   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3190   if (mult_cost > op_cost)
3191     mult_cost = op_cost;
3192
3193   *variant = basic_variant;
3194   limit.cost = mult_cost;
3195   limit.latency = mult_cost;
3196   synth_mult (alg, val, &limit, mode);
3197
3198   /* This works only if the inverted value actually fits in an
3199      `unsigned int' */
3200   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3201     {
3202       op_cost = neg_cost (speed, mode);
3203       if (MULT_COST_LESS (&alg->cost, mult_cost))
3204         {
3205           limit.cost = alg->cost.cost - op_cost;
3206           limit.latency = alg->cost.latency - op_cost;
3207         }
3208       else
3209         {
3210           limit.cost = mult_cost - op_cost;
3211           limit.latency = mult_cost - op_cost;
3212         }
3213
3214       synth_mult (&alg2, -val, &limit, mode);
3215       alg2.cost.cost += op_cost;
3216       alg2.cost.latency += op_cost;
3217       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3218         *alg = alg2, *variant = negate_variant;
3219     }
3220
3221   /* This proves very useful for division-by-constant.  */
3222   op_cost = add_cost (speed, mode);
3223   if (MULT_COST_LESS (&alg->cost, mult_cost))
3224     {
3225       limit.cost = alg->cost.cost - op_cost;
3226       limit.latency = alg->cost.latency - op_cost;
3227     }
3228   else
3229     {
3230       limit.cost = mult_cost - op_cost;
3231       limit.latency = mult_cost - op_cost;
3232     }
3233
3234   synth_mult (&alg2, val - 1, &limit, mode);
3235   alg2.cost.cost += op_cost;
3236   alg2.cost.latency += op_cost;
3237   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3238     *alg = alg2, *variant = add_variant;
3239
3240   return MULT_COST_LESS (&alg->cost, mult_cost);
3241 }
3242
3243 /* A subroutine of expand_mult, used for constant multiplications.
3244    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3245    convenient.  Use the shift/add sequence described by ALG and apply
3246    the final fixup specified by VARIANT.  */
3247
3248 static rtx
3249 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3250                    rtx target, const struct algorithm *alg,
3251                    enum mult_variant variant)
3252 {
3253   unsigned HOST_WIDE_INT val_so_far;
3254   rtx_insn *insn;
3255   rtx accum, tem;
3256   int opno;
3257   machine_mode nmode;
3258
3259   /* Avoid referencing memory over and over and invalid sharing
3260      on SUBREGs.  */
3261   op0 = force_reg (mode, op0);
3262
3263   /* ACCUM starts out either as OP0 or as a zero, depending on
3264      the first operation.  */
3265
3266   if (alg->op[0] == alg_zero)
3267     {
3268       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3269       val_so_far = 0;
3270     }
3271   else if (alg->op[0] == alg_m)
3272     {
3273       accum = copy_to_mode_reg (mode, op0);
3274       val_so_far = 1;
3275     }
3276   else
3277     gcc_unreachable ();
3278
3279   for (opno = 1; opno < alg->ops; opno++)
3280     {
3281       int log = alg->log[opno];
3282       rtx shift_subtarget = optimize ? 0 : accum;
3283       rtx add_target
3284         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3285            && !optimize)
3286           ? target : 0;
3287       rtx accum_target = optimize ? 0 : accum;
3288       rtx accum_inner;
3289
3290       switch (alg->op[opno])
3291         {
3292         case alg_shift:
3293           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3294           /* REG_EQUAL note will be attached to the following insn.  */
3295           emit_move_insn (accum, tem);
3296           val_so_far <<= log;
3297           break;
3298
3299         case alg_add_t_m2:
3300           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3301           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3302                                  add_target ? add_target : accum_target);
3303           val_so_far += HOST_WIDE_INT_1U << log;
3304           break;
3305
3306         case alg_sub_t_m2:
3307           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3308           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3309                                  add_target ? add_target : accum_target);
3310           val_so_far -= HOST_WIDE_INT_1U << log;
3311           break;
3312
3313         case alg_add_t2_m:
3314           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3315                                 log, shift_subtarget, 0);
3316           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3317                                  add_target ? add_target : accum_target);
3318           val_so_far = (val_so_far << log) + 1;
3319           break;
3320
3321         case alg_sub_t2_m:
3322           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3323                                 log, shift_subtarget, 0);
3324           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3325                                  add_target ? add_target : accum_target);
3326           val_so_far = (val_so_far << log) - 1;
3327           break;
3328
3329         case alg_add_factor:
3330           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3331           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3332                                  add_target ? add_target : accum_target);
3333           val_so_far += val_so_far << log;
3334           break;
3335
3336         case alg_sub_factor:
3337           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3338           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3339                                  (add_target
3340                                   ? add_target : (optimize ? 0 : tem)));
3341           val_so_far = (val_so_far << log) - val_so_far;
3342           break;
3343
3344         default:
3345           gcc_unreachable ();
3346         }
3347
3348       if (SCALAR_INT_MODE_P (mode))
3349         {
3350           /* Write a REG_EQUAL note on the last insn so that we can cse
3351              multiplication sequences.  Note that if ACCUM is a SUBREG,
3352              we've set the inner register and must properly indicate that.  */
3353           tem = op0, nmode = mode;
3354           accum_inner = accum;
3355           if (GET_CODE (accum) == SUBREG)
3356             {
3357               accum_inner = SUBREG_REG (accum);
3358               nmode = GET_MODE (accum_inner);
3359               tem = gen_lowpart (nmode, op0);
3360             }
3361
3362           /* Don't add a REG_EQUAL note if tem is a paradoxical SUBREG.
3363              In that case, only the low bits of accum would be guaranteed to
3364              be equal to the content of the REG_EQUAL note, the upper bits
3365              can be anything.  */
3366           if (!paradoxical_subreg_p (tem))
3367             {
3368               insn = get_last_insn ();
3369               wide_int wval_so_far
3370                 = wi::uhwi (val_so_far,
3371                             GET_MODE_PRECISION (as_a <scalar_mode> (nmode)));
3372               rtx c = immed_wide_int_const (wval_so_far, nmode);
3373               set_dst_reg_note (insn, REG_EQUAL, gen_rtx_MULT (nmode, tem, c),
3374                                 accum_inner);
3375             }
3376         }
3377     }
3378
3379   if (variant == negate_variant)
3380     {
3381       val_so_far = -val_so_far;
3382       accum = expand_unop (mode, neg_optab, accum, target, 0);
3383     }
3384   else if (variant == add_variant)
3385     {
3386       val_so_far = val_so_far + 1;
3387       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3388     }
3389
3390   /* Compare only the bits of val and val_so_far that are significant
3391      in the result mode, to avoid sign-/zero-extension confusion.  */
3392   nmode = GET_MODE_INNER (mode);
3393   val &= GET_MODE_MASK (nmode);
3394   val_so_far &= GET_MODE_MASK (nmode);
3395   gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3396
3397   return accum;
3398 }
3399
3400 /* Perform a multiplication and return an rtx for the result.
3401    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3402    TARGET is a suggestion for where to store the result (an rtx).
3403
3404    We check specially for a constant integer as OP1.
3405    If you want this check for OP0 as well, then before calling
3406    you should swap the two operands if OP0 would be constant.  */
3407
3408 rtx
3409 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3410              int unsignedp, bool no_libcall)
3411 {
3412   enum mult_variant variant;
3413   struct algorithm algorithm;
3414   rtx scalar_op1;
3415   int max_cost;
3416   bool speed = optimize_insn_for_speed_p ();
3417   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3418
3419   if (CONSTANT_P (op0))
3420     std::swap (op0, op1);
3421
3422   /* For vectors, there are several simplifications that can be made if
3423      all elements of the vector constant are identical.  */
3424   scalar_op1 = unwrap_const_vec_duplicate (op1);
3425
3426   if (INTEGRAL_MODE_P (mode))
3427     {
3428       rtx fake_reg;
3429       HOST_WIDE_INT coeff;
3430       bool is_neg;
3431       int mode_bitsize;
3432
3433       if (op1 == CONST0_RTX (mode))
3434         return op1;
3435       if (op1 == CONST1_RTX (mode))
3436         return op0;
3437       if (op1 == CONSTM1_RTX (mode))
3438         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3439                             op0, target, 0);
3440
3441       if (do_trapv)
3442         goto skip_synth;
3443
3444       /* If mode is integer vector mode, check if the backend supports
3445          vector lshift (by scalar or vector) at all.  If not, we can't use
3446          synthetized multiply.  */
3447       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3448           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3449           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3450         goto skip_synth;
3451
3452       /* These are the operations that are potentially turned into
3453          a sequence of shifts and additions.  */
3454       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3455
3456       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3457          less than or equal in size to `unsigned int' this doesn't matter.
3458          If the mode is larger than `unsigned int', then synth_mult works
3459          only if the constant value exactly fits in an `unsigned int' without
3460          any truncation.  This means that multiplying by negative values does
3461          not work; results are off by 2^32 on a 32 bit machine.  */
3462       if (CONST_INT_P (scalar_op1))
3463         {
3464           coeff = INTVAL (scalar_op1);
3465           is_neg = coeff < 0;
3466         }
3467 #if TARGET_SUPPORTS_WIDE_INT
3468       else if (CONST_WIDE_INT_P (scalar_op1))
3469 #else
3470       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3471 #endif
3472         {
3473           int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3474           /* Perfect power of 2 (other than 1, which is handled above).  */
3475           if (shift > 0)
3476             return expand_shift (LSHIFT_EXPR, mode, op0,
3477                                  shift, target, unsignedp);
3478           else
3479             goto skip_synth;
3480         }
3481       else
3482         goto skip_synth;
3483
3484       /* We used to test optimize here, on the grounds that it's better to
3485          produce a smaller program when -O is not used.  But this causes
3486          such a terrible slowdown sometimes that it seems better to always
3487          use synth_mult.  */
3488
3489       /* Special case powers of two.  */
3490       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3491           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3492         return expand_shift (LSHIFT_EXPR, mode, op0,
3493                              floor_log2 (coeff), target, unsignedp);
3494
3495       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3496
3497       /* Attempt to handle multiplication of DImode values by negative
3498          coefficients, by performing the multiplication by a positive
3499          multiplier and then inverting the result.  */
3500       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3501         {
3502           /* Its safe to use -coeff even for INT_MIN, as the
3503              result is interpreted as an unsigned coefficient.
3504              Exclude cost of op0 from max_cost to match the cost
3505              calculation of the synth_mult.  */
3506           coeff = -(unsigned HOST_WIDE_INT) coeff;
3507           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3508                                     mode, speed)
3509                       - neg_cost (speed, mode));
3510           if (max_cost <= 0)
3511             goto skip_synth;
3512
3513           /* Special case powers of two.  */
3514           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3515             {
3516               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3517                                        floor_log2 (coeff), target, unsignedp);
3518               return expand_unop (mode, neg_optab, temp, target, 0);
3519             }
3520
3521           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3522                                    max_cost))
3523             {
3524               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3525                                             &algorithm, variant);
3526               return expand_unop (mode, neg_optab, temp, target, 0);
3527             }
3528           goto skip_synth;
3529         }
3530
3531       /* Exclude cost of op0 from max_cost to match the cost
3532          calculation of the synth_mult.  */
3533       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3534       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3535         return expand_mult_const (mode, op0, coeff, target,
3536                                   &algorithm, variant);
3537     }
3538  skip_synth:
3539
3540   /* Expand x*2.0 as x+x.  */
3541   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3542       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3543     {
3544       op0 = force_reg (GET_MODE (op0), op0);
3545       return expand_binop (mode, add_optab, op0, op0,
3546                            target, unsignedp,
3547                            no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3548     }
3549
3550   /* This used to use umul_optab if unsigned, but for non-widening multiply
3551      there is no difference between signed and unsigned.  */
3552   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3553                       op0, op1, target, unsignedp,
3554                       no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3555   gcc_assert (op0 || no_libcall);
3556   return op0;
3557 }
3558
3559 /* Return a cost estimate for multiplying a register by the given
3560    COEFFicient in the given MODE and SPEED.  */
3561
3562 int
3563 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3564 {
3565   int max_cost;
3566   struct algorithm algorithm;
3567   enum mult_variant variant;
3568
3569   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3570   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3571                            mode, speed);
3572   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3573     return algorithm.cost.cost;
3574   else
3575     return max_cost;
3576 }
3577
3578 /* Perform a widening multiplication and return an rtx for the result.
3579    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3580    TARGET is a suggestion for where to store the result (an rtx).
3581    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3582    or smul_widen_optab.
3583
3584    We check specially for a constant integer as OP1, comparing the
3585    cost of a widening multiply against the cost of a sequence of shifts
3586    and adds.  */
3587
3588 rtx
3589 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3590                       int unsignedp, optab this_optab)
3591 {
3592   bool speed = optimize_insn_for_speed_p ();
3593   rtx cop1;
3594
3595   if (CONST_INT_P (op1)
3596       && GET_MODE (op0) != VOIDmode
3597       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3598                                 this_optab == umul_widen_optab))
3599       && CONST_INT_P (cop1)
3600       && (INTVAL (cop1) >= 0
3601           || HWI_COMPUTABLE_MODE_P (mode)))
3602     {
3603       HOST_WIDE_INT coeff = INTVAL (cop1);
3604       int max_cost;
3605       enum mult_variant variant;
3606       struct algorithm algorithm;
3607
3608       if (coeff == 0)
3609         return CONST0_RTX (mode);
3610
3611       /* Special case powers of two.  */
3612       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3613         {
3614           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3615           return expand_shift (LSHIFT_EXPR, mode, op0,
3616                                floor_log2 (coeff), target, unsignedp);
3617         }
3618
3619       /* Exclude cost of op0 from max_cost to match the cost
3620          calculation of the synth_mult.  */
3621       max_cost = mul_widen_cost (speed, mode);
3622       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3623                                max_cost))
3624         {
3625           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3626           return expand_mult_const (mode, op0, coeff, target,
3627                                     &algorithm, variant);
3628         }
3629     }
3630   return expand_binop (mode, this_optab, op0, op1, target,
3631                        unsignedp, OPTAB_LIB_WIDEN);
3632 }
3633 \f
3634 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3635    replace division by D, and put the least significant N bits of the result
3636    in *MULTIPLIER_PTR and return the most significant bit.
3637
3638    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3639    needed precision is in PRECISION (should be <= N).
3640
3641    PRECISION should be as small as possible so this function can choose
3642    multiplier more freely.
3643
3644    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3645    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3646
3647    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3648    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3649
3650 unsigned HOST_WIDE_INT
3651 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3652                    unsigned HOST_WIDE_INT *multiplier_ptr,
3653                    int *post_shift_ptr, int *lgup_ptr)
3654 {
3655   int lgup, post_shift;
3656   int pow, pow2;
3657
3658   /* lgup = ceil(log2(divisor)); */
3659   lgup = ceil_log2 (d);
3660
3661   gcc_assert (lgup <= n);
3662
3663   pow = n + lgup;
3664   pow2 = n + lgup - precision;
3665
3666   /* mlow = 2^(N + lgup)/d */
3667   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3668   wide_int mlow = wi::udiv_trunc (val, d);
3669
3670   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3671   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3672   wide_int mhigh = wi::udiv_trunc (val, d);
3673
3674   /* If precision == N, then mlow, mhigh exceed 2^N
3675      (but they do not exceed 2^(N+1)).  */
3676
3677   /* Reduce to lowest terms.  */
3678   for (post_shift = lgup; post_shift > 0; post_shift--)
3679     {
3680       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3681                                                        HOST_BITS_PER_WIDE_INT);
3682       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3683                                                        HOST_BITS_PER_WIDE_INT);
3684       if (ml_lo >= mh_lo)
3685         break;
3686
3687       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3688       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3689     }
3690
3691   *post_shift_ptr = post_shift;
3692   *lgup_ptr = lgup;
3693   if (n < HOST_BITS_PER_WIDE_INT)
3694     {
3695       unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3696       *multiplier_ptr = mhigh.to_uhwi () & mask;
3697       return mhigh.to_uhwi () > mask;
3698     }
3699   else
3700     {
3701       *multiplier_ptr = mhigh.to_uhwi ();
3702       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3703     }
3704 }
3705
3706 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3707    congruent to 1 (mod 2**N).  */
3708
3709 static unsigned HOST_WIDE_INT
3710 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3711 {
3712   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3713
3714   /* The algorithm notes that the choice y = x satisfies
3715      x*y == 1 mod 2^3, since x is assumed odd.
3716      Each iteration doubles the number of bits of significance in y.  */
3717
3718   unsigned HOST_WIDE_INT mask;
3719   unsigned HOST_WIDE_INT y = x;
3720   int nbit = 3;
3721
3722   mask = (n == HOST_BITS_PER_WIDE_INT
3723           ? HOST_WIDE_INT_M1U
3724           : (HOST_WIDE_INT_1U << n) - 1);
3725
3726   while (nbit < n)
3727     {
3728       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3729       nbit *= 2;
3730     }
3731   return y;
3732 }
3733
3734 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3735    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3736    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3737    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3738    become signed.
3739
3740    The result is put in TARGET if that is convenient.
3741
3742    MODE is the mode of operation.  */
3743
3744 rtx
3745 expand_mult_highpart_adjust (scalar_int_mode mode, rtx adj_operand, rtx op0,
3746                              rtx op1, rtx target, int unsignedp)
3747 {
3748   rtx tem;
3749   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3750
3751   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3752                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3753   tem = expand_and (mode, tem, op1, NULL_RTX);
3754   adj_operand
3755     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3756                      adj_operand);
3757
3758   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3759                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3760   tem = expand_and (mode, tem, op0, NULL_RTX);
3761   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3762                           target);
3763
3764   return target;
3765 }
3766
3767 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3768
3769 static rtx
3770 extract_high_half (scalar_int_mode mode, rtx op)
3771 {
3772   if (mode == word_mode)
3773     return gen_highpart (mode, op);
3774
3775   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3776
3777   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3778                      GET_MODE_BITSIZE (mode), 0, 1);
3779   return convert_modes (mode, wider_mode, op, 0);
3780 }
3781
3782 /* Like expmed_mult_highpart, but only consider using a multiplication
3783    optab.  OP1 is an rtx for the constant operand.  */
3784
3785 static rtx
3786 expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1,
3787                             rtx target, int unsignedp, int max_cost)
3788 {
3789   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3790   optab moptab;
3791   rtx tem;
3792   int size;
3793   bool speed = optimize_insn_for_speed_p ();
3794
3795   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3796
3797   size = GET_MODE_BITSIZE (mode);
3798
3799   /* Firstly, try using a multiplication insn that only generates the needed
3800      high part of the product, and in the sign flavor of unsignedp.  */
3801   if (mul_highpart_cost (speed, mode) < max_cost)
3802     {
3803       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3804       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3805                           unsignedp, OPTAB_DIRECT);
3806       if (tem)
3807         return tem;
3808     }
3809
3810   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3811      Need to adjust the result after the multiplication.  */
3812   if (size - 1 < BITS_PER_WORD
3813       && (mul_highpart_cost (speed, mode)
3814           + 2 * shift_cost (speed, mode, size-1)
3815           + 4 * add_cost (speed, mode) < max_cost))
3816     {
3817       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3818       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3819                           unsignedp, OPTAB_DIRECT);
3820       if (tem)
3821         /* We used the wrong signedness.  Adjust the result.  */
3822         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3823                                             tem, unsignedp);
3824     }
3825
3826   /* Try widening multiplication.  */
3827   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3828   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3829       && mul_widen_cost (speed, wider_mode) < max_cost)
3830     {
3831       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3832                           unsignedp, OPTAB_WIDEN);
3833       if (tem)
3834         return extract_high_half (mode, tem);
3835     }
3836
3837   /* Try widening the mode and perform a non-widening multiplication.  */
3838   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3839       && size - 1 < BITS_PER_WORD
3840       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3841           < max_cost))
3842     {
3843       rtx_insn *insns;
3844       rtx wop0, wop1;
3845
3846       /* We need to widen the operands, for example to ensure the
3847          constant multiplier is correctly sign or zero extended.
3848          Use a sequence to clean-up any instructions emitted by
3849          the conversions if things don't work out.  */
3850       start_sequence ();
3851       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3852       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3853       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3854                           unsignedp, OPTAB_WIDEN);
3855       insns = get_insns ();
3856       end_sequence ();
3857
3858       if (tem)
3859         {
3860           emit_insn (insns);
3861           return extract_high_half (mode, tem);
3862         }
3863     }
3864
3865   /* Try widening multiplication of opposite signedness, and adjust.  */
3866   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3867   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3868       && size - 1 < BITS_PER_WORD
3869       && (mul_widen_cost (speed, wider_mode)
3870           + 2 * shift_cost (speed, mode, size-1)
3871           + 4 * add_cost (speed, mode) < max_cost))
3872     {
3873       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3874                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3875       if (tem != 0)
3876         {
3877           tem = extract_high_half (mode, tem);
3878           /* We used the wrong signedness.  Adjust the result.  */
3879           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3880                                               target, unsignedp);
3881         }
3882     }
3883
3884   return 0;
3885 }
3886
3887 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3888    putting the high half of the result in TARGET if that is convenient,
3889    and return where the result is.  If the operation cannot be performed,
3890    0 is returned.
3891
3892    MODE is the mode of operation and result.
3893
3894    UNSIGNEDP nonzero means unsigned multiply.
3895
3896    MAX_COST is the total allowed cost for the expanded RTL.  */
3897
3898 static rtx
3899 expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1,
3900                       rtx target, int unsignedp, int max_cost)
3901 {
3902   unsigned HOST_WIDE_INT cnst1;
3903   int extra_cost;
3904   bool sign_adjust = false;
3905   enum mult_variant variant;
3906   struct algorithm alg;
3907   rtx tem;
3908   bool speed = optimize_insn_for_speed_p ();
3909
3910   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3911   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3912
3913   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3914
3915   /* We can't optimize modes wider than BITS_PER_WORD.
3916      ??? We might be able to perform double-word arithmetic if
3917      mode == word_mode, however all the cost calculations in
3918      synth_mult etc. assume single-word operations.  */
3919   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3920   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3921     return expmed_mult_highpart_optab (mode, op0, op1, target,
3922                                        unsignedp, max_cost);
3923
3924   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3925
3926   /* Check whether we try to multiply by a negative constant.  */
3927   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3928     {
3929       sign_adjust = true;
3930       extra_cost += add_cost (speed, mode);
3931     }
3932
3933   /* See whether shift/add multiplication is cheap enough.  */
3934   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3935                            max_cost - extra_cost))
3936     {
3937       /* See whether the specialized multiplication optabs are
3938          cheaper than the shift/add version.  */
3939       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3940                                         alg.cost.cost + extra_cost);
3941       if (tem)
3942         return tem;
3943
3944       tem = convert_to_mode (wider_mode, op0, unsignedp);
3945       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3946       tem = extract_high_half (mode, tem);
3947
3948       /* Adjust result for signedness.  */
3949       if (sign_adjust)
3950         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3951
3952       return tem;
3953     }
3954   return expmed_mult_highpart_optab (mode, op0, op1, target,
3955                                      unsignedp, max_cost);
3956 }
3957
3958
3959 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3960
3961 static rtx
3962 expand_smod_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
3963 {
3964   rtx result, temp, shift;
3965   rtx_code_label *label;
3966   int logd;
3967   int prec = GET_MODE_PRECISION (mode);
3968
3969   logd = floor_log2 (d);
3970   result = gen_reg_rtx (mode);
3971
3972   /* Avoid conditional branches when they're expensive.  */
3973   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3974       && optimize_insn_for_speed_p ())
3975     {
3976       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3977                                       mode, 0, -1);
3978       if (signmask)
3979         {
3980           HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
3981           signmask = force_reg (mode, signmask);
3982           shift = gen_int_shift_amount (mode, GET_MODE_BITSIZE (mode) - logd);
3983
3984           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3985              which instruction sequence to use.  If logical right shifts
3986              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3987              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3988
3989           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3990           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3991               || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
3992                   > COSTS_N_INSNS (2)))
3993             {
3994               temp = expand_binop (mode, xor_optab, op0, signmask,
3995                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3996               temp = expand_binop (mode, sub_optab, temp, signmask,
3997                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3998               temp = expand_binop (mode, and_optab, temp,
3999                                    gen_int_mode (masklow, mode),
4000                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4001               temp = expand_binop (mode, xor_optab, temp, signmask,
4002                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4003               temp = expand_binop (mode, sub_optab, temp, signmask,
4004                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4005             }
4006           else
4007             {
4008               signmask = expand_binop (mode, lshr_optab, signmask, shift,
4009                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
4010               signmask = force_reg (mode, signmask);
4011
4012               temp = expand_binop (mode, add_optab, op0, signmask,
4013                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4014               temp = expand_binop (mode, and_optab, temp,
4015                                    gen_int_mode (masklow, mode),
4016                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4017               temp = expand_binop (mode, sub_optab, temp, signmask,
4018                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4019             }
4020           return temp;
4021         }
4022     }
4023
4024   /* Mask contains the mode's signbit and the significant bits of the
4025      modulus.  By including the signbit in the operation, many targets
4026      can avoid an explicit compare operation in the following comparison
4027      against zero.  */
4028   wide_int mask = wi::mask (logd, false, prec);
4029   mask = wi::set_bit (mask, prec - 1);
4030
4031   temp = expand_binop (mode, and_optab, op0,
4032                        immed_wide_int_const (mask, mode),
4033                        result, 1, OPTAB_LIB_WIDEN);
4034   if (temp != result)
4035     emit_move_insn (result, temp);
4036
4037   label = gen_label_rtx ();
4038   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
4039
4040   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
4041                        0, OPTAB_LIB_WIDEN);
4042
4043   mask = wi::mask (logd, true, prec);
4044   temp = expand_binop (mode, ior_optab, temp,
4045                        immed_wide_int_const (mask, mode),
4046                        result, 1, OPTAB_LIB_WIDEN);
4047   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
4048                        0, OPTAB_LIB_WIDEN);
4049   if (temp != result)
4050     emit_move_insn (result, temp);
4051   emit_label (label);
4052   return result;
4053 }
4054
4055 /* Expand signed division of OP0 by a power of two D in mode MODE.
4056    This routine is only called for positive values of D.  */
4057
4058 static rtx
4059 expand_sdiv_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
4060 {
4061   rtx temp;
4062   rtx_code_label *label;
4063   int logd;
4064
4065   logd = floor_log2 (d);
4066
4067   if (d == 2
4068       && BRANCH_COST (optimize_insn_for_speed_p (),
4069                       false) >= 1)
4070     {
4071       temp = gen_reg_rtx (mode);
4072       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
4073       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4074                            0, OPTAB_LIB_WIDEN);
4075       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4076     }
4077
4078   if (HAVE_conditional_move
4079       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
4080     {
4081       rtx temp2;
4082
4083       start_sequence ();
4084       temp2 = copy_to_mode_reg (mode, op0);
4085       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
4086                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
4087       temp = force_reg (mode, temp);
4088
4089       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
4090       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
4091                                      mode, temp, temp2, mode, 0);
4092       if (temp2)
4093         {
4094           rtx_insn *seq = get_insns ();
4095           end_sequence ();
4096           emit_insn (seq);
4097           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
4098         }
4099       end_sequence ();
4100     }
4101
4102   if (BRANCH_COST (optimize_insn_for_speed_p (),
4103                    false) >= 2)
4104     {
4105       int ushift = GET_MODE_BITSIZE (mode) - logd;
4106
4107       temp = gen_reg_rtx (mode);
4108       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
4109       if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
4110           || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
4111              > COSTS_N_INSNS (1))
4112         temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
4113                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
4114       else
4115         temp = expand_shift (RSHIFT_EXPR, mode, temp,
4116                              ushift, NULL_RTX, 1);
4117       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4118                            0, OPTAB_LIB_WIDEN);
4119       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4120     }
4121
4122   label = gen_label_rtx ();
4123   temp = copy_to_mode_reg (mode, op0);
4124   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
4125   expand_inc (temp, gen_int_mode (d - 1, mode));
4126   emit_label (label);
4127   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4128 }
4129 \f
4130 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
4131    if that is convenient, and returning where the result is.
4132    You may request either the quotient or the remainder as the result;
4133    specify REM_FLAG nonzero to get the remainder.
4134
4135    CODE is the expression code for which kind of division this is;
4136    it controls how rounding is done.  MODE is the machine mode to use.
4137    UNSIGNEDP nonzero means do unsigned division.  */
4138
4139 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4140    and then correct it by or'ing in missing high bits
4141    if result of ANDI is nonzero.
4142    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4143    This could optimize to a bfexts instruction.
4144    But C doesn't use these operations, so their optimizations are
4145    left for later.  */
4146 /* ??? For modulo, we don't actually need the highpart of the first product,
4147    the low part will do nicely.  And for small divisors, the second multiply
4148    can also be a low-part only multiply or even be completely left out.
4149    E.g. to calculate the remainder of a division by 3 with a 32 bit
4150    multiply, multiply with 0x55555556 and extract the upper two bits;
4151    the result is exact for inputs up to 0x1fffffff.
4152    The input range can be reduced by using cross-sum rules.
4153    For odd divisors >= 3, the following table gives right shift counts
4154    so that if a number is shifted by an integer multiple of the given
4155    amount, the remainder stays the same:
4156    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4157    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4158    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4159    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4160    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4161
4162    Cross-sum rules for even numbers can be derived by leaving as many bits
4163    to the right alone as the divisor has zeros to the right.
4164    E.g. if x is an unsigned 32 bit number:
4165    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4166    */
4167
4168 rtx
4169 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4170                rtx op0, rtx op1, rtx target, int unsignedp)
4171 {
4172   machine_mode compute_mode;
4173   rtx tquotient;
4174   rtx quotient = 0, remainder = 0;
4175   rtx_insn *last;
4176   rtx_insn *insn;
4177   optab optab1, optab2;
4178   int op1_is_constant, op1_is_pow2 = 0;
4179   int max_cost, extra_cost;
4180   static HOST_WIDE_INT last_div_const = 0;
4181   bool speed = optimize_insn_for_speed_p ();
4182
4183   op1_is_constant = CONST_INT_P (op1);
4184   if (op1_is_constant)
4185     {
4186       wide_int ext_op1 = rtx_mode_t (op1, mode);
4187       op1_is_pow2 = (wi::popcount (ext_op1) == 1
4188                      || (! unsignedp
4189                          && wi::popcount (wi::neg (ext_op1)) == 1));
4190     }
4191
4192   /*
4193      This is the structure of expand_divmod:
4194
4195      First comes code to fix up the operands so we can perform the operations
4196      correctly and efficiently.
4197
4198      Second comes a switch statement with code specific for each rounding mode.
4199      For some special operands this code emits all RTL for the desired
4200      operation, for other cases, it generates only a quotient and stores it in
4201      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4202      to indicate that it has not done anything.
4203
4204      Last comes code that finishes the operation.  If QUOTIENT is set and
4205      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4206      QUOTIENT is not set, it is computed using trunc rounding.
4207
4208      We try to generate special code for division and remainder when OP1 is a
4209      constant.  If |OP1| = 2**n we can use shifts and some other fast
4210      operations.  For other values of OP1, we compute a carefully selected
4211      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4212      by m.
4213
4214      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4215      half of the product.  Different strategies for generating the product are
4216      implemented in expmed_mult_highpart.
4217
4218      If what we actually want is the remainder, we generate that by another
4219      by-constant multiplication and a subtraction.  */
4220
4221   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4222      code below will malfunction if we are, so check here and handle
4223      the special case if so.  */
4224   if (op1 == const1_rtx)
4225     return rem_flag ? const0_rtx : op0;
4226
4227     /* When dividing by -1, we could get an overflow.
4228      negv_optab can handle overflows.  */
4229   if (! unsignedp && op1 == constm1_rtx)
4230     {
4231       if (rem_flag)
4232         return const0_rtx;
4233       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4234                           ? negv_optab : neg_optab, op0, target, 0);
4235     }
4236
4237   if (target
4238       /* Don't use the function value register as a target
4239          since we have to read it as well as write it,
4240          and function-inlining gets confused by this.  */
4241       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4242           /* Don't clobber an operand while doing a multi-step calculation.  */
4243           || ((rem_flag || op1_is_constant)
4244               && (reg_mentioned_p (target, op0)
4245                   || (MEM_P (op0) && MEM_P (target))))
4246           || reg_mentioned_p (target, op1)
4247           || (MEM_P (op1) && MEM_P (target))))
4248     target = 0;
4249
4250   /* Get the mode in which to perform this computation.  Normally it will
4251      be MODE, but sometimes we can't do the desired operation in MODE.
4252      If so, pick a wider mode in which we can do the operation.  Convert
4253      to that mode at the start to avoid repeated conversions.
4254
4255      First see what operations we need.  These depend on the expression
4256      we are evaluating.  (We assume that divxx3 insns exist under the
4257      same conditions that modxx3 insns and that these insns don't normally
4258      fail.  If these assumptions are not correct, we may generate less
4259      efficient code in some cases.)
4260
4261      Then see if we find a mode in which we can open-code that operation
4262      (either a division, modulus, or shift).  Finally, check for the smallest
4263      mode for which we can do the operation with a library call.  */
4264
4265   /* We might want to refine this now that we have division-by-constant
4266      optimization.  Since expmed_mult_highpart tries so many variants, it is
4267      not straightforward to generalize this.  Maybe we should make an array
4268      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4269
4270   optab1 = (op1_is_pow2
4271             ? (unsignedp ? lshr_optab : ashr_optab)
4272             : (unsignedp ? udiv_optab : sdiv_optab));
4273   optab2 = (op1_is_pow2 ? optab1
4274             : (unsignedp ? udivmod_optab : sdivmod_optab));
4275
4276   FOR_EACH_MODE_FROM (compute_mode, mode)
4277     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4278         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4279       break;
4280
4281   if (compute_mode == VOIDmode)
4282     FOR_EACH_MODE_FROM (compute_mode, mode)
4283       if (optab_libfunc (optab1, compute_mode)
4284           || optab_libfunc (optab2, compute_mode))
4285         break;
4286
4287   /* If we still couldn't find a mode, use MODE, but expand_binop will
4288      probably die.  */
4289   if (compute_mode == VOIDmode)
4290     compute_mode = mode;
4291
4292   if (target && GET_MODE (target) == compute_mode)
4293     tquotient = target;
4294   else
4295     tquotient = gen_reg_rtx (compute_mode);
4296
4297 #if 0
4298   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4299      (mode), and thereby get better code when OP1 is a constant.  Do that
4300      later.  It will require going over all usages of SIZE below.  */
4301   size = GET_MODE_BITSIZE (mode);
4302 #endif
4303
4304   /* Only deduct something for a REM if the last divide done was
4305      for a different constant.   Then set the constant of the last
4306      divide.  */
4307   max_cost = (unsignedp
4308               ? udiv_cost (speed, compute_mode)
4309               : sdiv_cost (speed, compute_mode));
4310   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4311                      && INTVAL (op1) == last_div_const))
4312     max_cost -= (mul_cost (speed, compute_mode)
4313                  + add_cost (speed, compute_mode));
4314
4315   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4316
4317   /* Now convert to the best mode to use.  */
4318   if (compute_mode != mode)
4319     {
4320       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4321       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4322
4323       /* convert_modes may have placed op1 into a register, so we
4324          must recompute the following.  */
4325       op1_is_constant = CONST_INT_P (op1);
4326       if (op1_is_constant)
4327         {
4328           wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4329           op1_is_pow2 = (wi::popcount (ext_op1) == 1
4330                          || (! unsignedp
4331                              && wi::popcount (wi::neg (ext_op1)) == 1));
4332         }
4333       else
4334         op1_is_pow2 = 0;
4335     }
4336
4337   /* If one of the operands is a volatile MEM, copy it into a register.  */
4338
4339   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4340     op0 = force_reg (compute_mode, op0);
4341   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4342     op1 = force_reg (compute_mode, op1);
4343
4344   /* If we need the remainder or if OP1 is constant, we need to
4345      put OP0 in a register in case it has any queued subexpressions.  */
4346   if (rem_flag || op1_is_constant)
4347     op0 = force_reg (compute_mode, op0);
4348
4349   last = get_last_insn ();
4350
4351   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4352   if (unsignedp)
4353     {
4354       if (code == FLOOR_DIV_EXPR)
4355         code = TRUNC_DIV_EXPR;
4356       if (code == FLOOR_MOD_EXPR)
4357         code = TRUNC_MOD_EXPR;
4358       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4359         code = TRUNC_DIV_EXPR;
4360     }
4361
4362   if (op1 != const0_rtx)
4363     switch (code)
4364       {
4365       case TRUNC_MOD_EXPR:
4366       case TRUNC_DIV_EXPR:
4367         if (op1_is_constant)
4368           {
4369             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4370             int size = GET_MODE_BITSIZE (int_mode);
4371             if (unsignedp)
4372               {
4373                 unsigned HOST_WIDE_INT mh, ml;
4374                 int pre_shift, post_shift;
4375                 int dummy;
4376                 wide_int wd = rtx_mode_t (op1, int_mode);
4377                 unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4378
4379                 if (wi::popcount (wd) == 1)
4380                   {
4381                     pre_shift = floor_log2 (d);
4382                     if (rem_flag)
4383                       {
4384                         unsigned HOST_WIDE_INT mask
4385                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4386                         remainder
4387                           = expand_binop (int_mode, and_optab, op0,
4388                                           gen_int_mode (mask, int_mode),
4389                                           remainder, 1,
4390                                           OPTAB_LIB_WIDEN);
4391                         if (remainder)
4392                           return gen_lowpart (mode, remainder);
4393                       }
4394                     quotient = expand_shift (RSHIFT_EXPR, int_mode, op0,
4395                                              pre_shift, tquotient, 1);
4396                   }
4397                 else if (size <= HOST_BITS_PER_WIDE_INT)
4398                   {
4399                     if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4400                       {
4401                         /* Most significant bit of divisor is set; emit an scc
4402                            insn.  */
4403                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4404                                                           int_mode, 1, 1);
4405                       }
4406                     else
4407                       {
4408                         /* Find a suitable multiplier and right shift count
4409                            instead of multiplying with D.  */
4410
4411                         mh = choose_multiplier (d, size, size,
4412                                                 &ml, &post_shift, &dummy);
4413
4414                         /* If the suggested multiplier is more than SIZE bits,
4415                            we can do better for even divisors, using an
4416                            initial right shift.  */
4417                         if (mh != 0 && (d & 1) == 0)
4418                           {
4419                             pre_shift = ctz_or_zero (d);
4420                             mh = choose_multiplier (d >> pre_shift, size,
4421                                                     size - pre_shift,
4422                                                     &ml, &post_shift, &dummy);
4423                             gcc_assert (!mh);
4424                           }
4425                         else
4426                           pre_shift = 0;
4427
4428                         if (mh != 0)
4429                           {
4430                             rtx t1, t2, t3, t4;
4431
4432                             if (post_shift - 1 >= BITS_PER_WORD)
4433                               goto fail1;
4434
4435                             extra_cost
4436                               = (shift_cost (speed, int_mode, post_shift - 1)
4437                                  + shift_cost (speed, int_mode, 1)
4438                                  + 2 * add_cost (speed, int_mode));
4439                             t1 = expmed_mult_highpart
4440                               (int_mode, op0, gen_int_mode (ml, int_mode),
4441                                NULL_RTX, 1, max_cost - extra_cost);
4442                             if (t1 == 0)
4443                               goto fail1;
4444                             t2 = force_operand (gen_rtx_MINUS (int_mode,
4445                                                                op0, t1),
4446                                                 NULL_RTX);
4447                             t3 = expand_shift (RSHIFT_EXPR, int_mode,
4448                                                t2, 1, NULL_RTX, 1);
4449                             t4 = force_operand (gen_rtx_PLUS (int_mode,
4450                                                               t1, t3),
4451                                                 NULL_RTX);
4452                             quotient = expand_shift
4453                               (RSHIFT_EXPR, int_mode, t4,
4454                                post_shift - 1, tquotient, 1);
4455                           }
4456                         else
4457                           {
4458                             rtx t1, t2;
4459
4460                             if (pre_shift >= BITS_PER_WORD
4461                                 || post_shift >= BITS_PER_WORD)
4462                               goto fail1;
4463
4464                             t1 = expand_shift
4465                               (RSHIFT_EXPR, int_mode, op0,
4466                                pre_shift, NULL_RTX, 1);
4467                             extra_cost
4468                               = (shift_cost (speed, int_mode, pre_shift)
4469                                  + shift_cost (speed, int_mode, post_shift));
4470                             t2 = expmed_mult_highpart
4471                               (int_mode, t1,
4472                                gen_int_mode (ml, int_mode),
4473                                NULL_RTX, 1, max_cost - extra_cost);
4474                             if (t2 == 0)
4475                               goto fail1;
4476                             quotient = expand_shift
4477                               (RSHIFT_EXPR, int_mode, t2,
4478                                post_shift, tquotient, 1);
4479                           }
4480                       }
4481                   }
4482                 else            /* Too wide mode to use tricky code */
4483                   break;
4484
4485                 insn = get_last_insn ();
4486                 if (insn != last)
4487                   set_dst_reg_note (insn, REG_EQUAL,
4488                                     gen_rtx_UDIV (int_mode, op0, op1),
4489                                     quotient);
4490               }
4491             else                /* TRUNC_DIV, signed */
4492               {
4493                 unsigned HOST_WIDE_INT ml;
4494                 int lgup, post_shift;
4495                 rtx mlr;
4496                 HOST_WIDE_INT d = INTVAL (op1);
4497                 unsigned HOST_WIDE_INT abs_d;
4498
4499                 /* Not prepared to handle division/remainder by
4500                    0xffffffffffffffff8000000000000000 etc.  */
4501                 if (d == HOST_WIDE_INT_MIN && size > HOST_BITS_PER_WIDE_INT)
4502                   break;
4503
4504                 /* Since d might be INT_MIN, we have to cast to
4505                    unsigned HOST_WIDE_INT before negating to avoid
4506                    undefined signed overflow.  */
4507                 abs_d = (d >= 0
4508                          ? (unsigned HOST_WIDE_INT) d
4509                          : - (unsigned HOST_WIDE_INT) d);
4510
4511                 /* n rem d = n rem -d */
4512                 if (rem_flag && d < 0)
4513                   {
4514                     d = abs_d;
4515                     op1 = gen_int_mode (abs_d, int_mode);
4516                   }
4517
4518                 if (d == 1)
4519                   quotient = op0;
4520                 else if (d == -1)
4521                   quotient = expand_unop (int_mode, neg_optab, op0,
4522                                           tquotient, 0);
4523                 else if (size <= HOST_BITS_PER_WIDE_INT
4524                          && abs_d == HOST_WIDE_INT_1U << (size - 1))
4525                   {
4526                     /* This case is not handled correctly below.  */
4527                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4528                                                 int_mode, 1, 1);
4529                     if (quotient == 0)
4530                       goto fail1;
4531                   }
4532                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4533                          && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4534                          && (rem_flag
4535                              ? smod_pow2_cheap (speed, int_mode)
4536                              : sdiv_pow2_cheap (speed, int_mode))
4537                          /* We assume that cheap metric is true if the
4538                             optab has an expander for this mode.  */
4539                          && ((optab_handler ((rem_flag ? smod_optab
4540                                               : sdiv_optab),
4541                                              int_mode)
4542                               != CODE_FOR_nothing)
4543                              || (optab_handler (sdivmod_optab, int_mode)
4544                                  != CODE_FOR_nothing)))
4545                   ;
4546                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4547                   {
4548                     if (rem_flag)
4549                       {
4550                         remainder = expand_smod_pow2 (int_mode, op0, d);
4551                         if (remainder)
4552                           return gen_lowpart (mode, remainder);
4553                       }
4554
4555                     if (sdiv_pow2_cheap (speed, int_mode)
4556                         && ((optab_handler (sdiv_optab, int_mode)
4557                              != CODE_FOR_nothing)
4558                             || (optab_handler (sdivmod_optab, int_mode)
4559                                 != CODE_FOR_nothing)))
4560                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4561                                                 int_mode, op0,
4562                                                 gen_int_mode (abs_d,
4563                                                               int_mode),
4564                                                 NULL_RTX, 0);
4565                     else
4566                       quotient = expand_sdiv_pow2 (int_mode, op0, abs_d);
4567
4568                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4569                        negate the quotient.  */
4570                     if (d < 0)
4571                       {
4572                         insn = get_last_insn ();
4573                         if (insn != last
4574                             && abs_d < (HOST_WIDE_INT_1U
4575                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4576                           set_dst_reg_note (insn, REG_EQUAL,
4577                                             gen_rtx_DIV (int_mode, op0,
4578                                                          gen_int_mode
4579                                                            (abs_d,
4580                                                             int_mode)),
4581                                             quotient);
4582
4583                         quotient = expand_unop (int_mode, neg_optab,
4584                                                 quotient, quotient, 0);
4585                       }
4586                   }
4587                 else if (size <= HOST_BITS_PER_WIDE_INT)
4588                   {
4589                     choose_multiplier (abs_d, size, size - 1,
4590                                        &ml, &post_shift, &lgup);
4591                     if (ml < HOST_WIDE_INT_1U << (size - 1))
4592                       {
4593                         rtx t1, t2, t3;
4594
4595                         if (post_shift >= BITS_PER_WORD
4596                             || size - 1 >= BITS_PER_WORD)
4597                           goto fail1;
4598
4599                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4600                                       + shift_cost (speed, int_mode, size - 1)
4601                                       + add_cost (speed, int_mode));
4602                         t1 = expmed_mult_highpart
4603                           (int_mode, op0, gen_int_mode (ml, int_mode),
4604                            NULL_RTX, 0, max_cost - extra_cost);
4605                         if (t1 == 0)
4606                           goto fail1;
4607                         t2 = expand_shift
4608                           (RSHIFT_EXPR, int_mode, t1,
4609                            post_shift, NULL_RTX, 0);
4610                         t3 = expand_shift
4611                           (RSHIFT_EXPR, int_mode, op0,
4612                            size - 1, NULL_RTX, 0);
4613                         if (d < 0)
4614                           quotient
4615                             = force_operand (gen_rtx_MINUS (int_mode, t3, t2),
4616                                              tquotient);
4617                         else
4618                           quotient
4619                             = force_operand (gen_rtx_MINUS (int_mode, t2, t3),
4620                                              tquotient);
4621                       }
4622                     else
4623                       {
4624                         rtx t1, t2, t3, t4;
4625
4626                         if (post_shift >= BITS_PER_WORD
4627                             || size - 1 >= BITS_PER_WORD)
4628                           goto fail1;
4629
4630                         ml |= HOST_WIDE_INT_M1U << (size - 1);
4631                         mlr = gen_int_mode (ml, int_mode);
4632                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4633                                       + shift_cost (speed, int_mode, size - 1)
4634                                       + 2 * add_cost (speed, int_mode));
4635                         t1 = expmed_mult_highpart (int_mode, op0, mlr,
4636                                                    NULL_RTX, 0,
4637                                                    max_cost - extra_cost);
4638                         if (t1 == 0)
4639                           goto fail1;
4640                         t2 = force_operand (gen_rtx_PLUS (int_mode, t1, op0),
4641                                             NULL_RTX);
4642                         t3 = expand_shift
4643                           (RSHIFT_EXPR, int_mode, t2,
4644                            post_shift, NULL_RTX, 0);
4645                         t4 = expand_shift
4646                           (RSHIFT_EXPR, int_mode, op0,
4647                            size - 1, NULL_RTX, 0);
4648                         if (d < 0)
4649                           quotient
4650                             = force_operand (gen_rtx_MINUS (int_mode, t4, t3),
4651                                              tquotient);
4652                         else
4653                           quotient
4654                             = force_operand (gen_rtx_MINUS (int_mode, t3, t4),
4655                                              tquotient);
4656                       }
4657                   }
4658                 else            /* Too wide mode to use tricky code */
4659                   break;
4660
4661                 insn = get_last_insn ();
4662                 if (insn != last)
4663                   set_dst_reg_note (insn, REG_EQUAL,
4664                                     gen_rtx_DIV (int_mode, op0, op1),
4665                                     quotient);
4666               }
4667             break;
4668           }
4669       fail1:
4670         delete_insns_since (last);
4671         break;
4672
4673       case FLOOR_DIV_EXPR:
4674       case FLOOR_MOD_EXPR:
4675       /* We will come here only for signed operations.  */
4676         if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4677           {
4678             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4679             int size = GET_MODE_BITSIZE (int_mode);
4680             unsigned HOST_WIDE_INT mh, ml;
4681             int pre_shift, lgup, post_shift;
4682             HOST_WIDE_INT d = INTVAL (op1);
4683
4684             if (d > 0)
4685               {
4686                 /* We could just as easily deal with negative constants here,
4687                    but it does not seem worth the trouble for GCC 2.6.  */
4688                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4689                   {
4690                     pre_shift = floor_log2 (d);
4691                     if (rem_flag)
4692                       {
4693                         unsigned HOST_WIDE_INT mask
4694                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4695                         remainder = expand_binop
4696                           (int_mode, and_optab, op0,
4697                            gen_int_mode (mask, int_mode),
4698                            remainder, 0, OPTAB_LIB_WIDEN);
4699                         if (remainder)
4700                           return gen_lowpart (mode, remainder);
4701                       }
4702                     quotient = expand_shift
4703                       (RSHIFT_EXPR, int_mode, op0,
4704                        pre_shift, tquotient, 0);
4705                   }
4706                 else
4707                   {
4708                     rtx t1, t2, t3, t4;
4709
4710                     mh = choose_multiplier (d, size, size - 1,
4711                                             &ml, &post_shift, &lgup);
4712                     gcc_assert (!mh);
4713
4714                     if (post_shift < BITS_PER_WORD
4715                         && size - 1 < BITS_PER_WORD)
4716                       {
4717                         t1 = expand_shift
4718                           (RSHIFT_EXPR, int_mode, op0,
4719                            size - 1, NULL_RTX, 0);
4720                         t2 = expand_binop (int_mode, xor_optab, op0, t1,
4721                                            NULL_RTX, 0, OPTAB_WIDEN);
4722                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4723                                       + shift_cost (speed, int_mode, size - 1)
4724                                       + 2 * add_cost (speed, int_mode));
4725                         t3 = expmed_mult_highpart
4726                           (int_mode, t2, gen_int_mode (ml, int_mode),
4727                            NULL_RTX, 1, max_cost - extra_cost);
4728                         if (t3 != 0)
4729                           {
4730                             t4 = expand_shift
4731                               (RSHIFT_EXPR, int_mode, t3,
4732                                post_shift, NULL_RTX, 1);
4733                             quotient = expand_binop (int_mode, xor_optab,
4734                                                      t4, t1, tquotient, 0,
4735                                                      OPTAB_WIDEN);
4736                           }
4737                       }
4738                   }
4739               }
4740             else
4741               {
4742                 rtx nsign, t1, t2, t3, t4;
4743                 t1 = force_operand (gen_rtx_PLUS (int_mode,
4744                                                   op0, constm1_rtx), NULL_RTX);
4745                 t2 = expand_binop (int_mode, ior_optab, op0, t1, NULL_RTX,
4746                                    0, OPTAB_WIDEN);
4747                 nsign = expand_shift (RSHIFT_EXPR, int_mode, t2,
4748                                       size - 1, NULL_RTX, 0);
4749                 t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign),
4750                                     NULL_RTX);
4751                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, t3, op1,
4752                                     NULL_RTX, 0);
4753                 if (t4)
4754                   {
4755                     rtx t5;
4756                     t5 = expand_unop (int_mode, one_cmpl_optab, nsign,
4757                                       NULL_RTX, 0);
4758                     quotient = force_operand (gen_rtx_PLUS (int_mode, t4, t5),
4759                                               tquotient);
4760                   }
4761               }
4762           }
4763
4764         if (quotient != 0)
4765           break;
4766         delete_insns_since (last);
4767
4768         /* Try using an instruction that produces both the quotient and
4769            remainder, using truncation.  We can easily compensate the quotient
4770            or remainder to get floor rounding, once we have the remainder.
4771            Notice that we compute also the final remainder value here,
4772            and return the result right away.  */
4773         if (target == 0 || GET_MODE (target) != compute_mode)
4774           target = gen_reg_rtx (compute_mode);
4775
4776         if (rem_flag)
4777           {
4778             remainder
4779               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4780             quotient = gen_reg_rtx (compute_mode);
4781           }
4782         else
4783           {
4784             quotient
4785               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4786             remainder = gen_reg_rtx (compute_mode);
4787           }
4788
4789         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4790                                  quotient, remainder, 0))
4791           {
4792             /* This could be computed with a branch-less sequence.
4793                Save that for later.  */
4794             rtx tem;
4795             rtx_code_label *label = gen_label_rtx ();
4796             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4797             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4798                                 NULL_RTX, 0, OPTAB_WIDEN);
4799             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4800             expand_dec (quotient, const1_rtx);
4801             expand_inc (remainder, op1);
4802             emit_label (label);
4803             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4804           }
4805
4806         /* No luck with division elimination or divmod.  Have to do it
4807            by conditionally adjusting op0 *and* the result.  */
4808         {
4809           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4810           rtx adjusted_op0;
4811           rtx tem;
4812
4813           quotient = gen_reg_rtx (compute_mode);
4814           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4815           label1 = gen_label_rtx ();
4816           label2 = gen_label_rtx ();
4817           label3 = gen_label_rtx ();
4818           label4 = gen_label_rtx ();
4819           label5 = gen_label_rtx ();
4820           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4821           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4822           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4823                               quotient, 0, OPTAB_LIB_WIDEN);
4824           if (tem != quotient)
4825             emit_move_insn (quotient, tem);
4826           emit_jump_insn (targetm.gen_jump (label5));
4827           emit_barrier ();
4828           emit_label (label1);
4829           expand_inc (adjusted_op0, const1_rtx);
4830           emit_jump_insn (targetm.gen_jump (label4));
4831           emit_barrier ();
4832           emit_label (label2);
4833           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4834           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4835                               quotient, 0, OPTAB_LIB_WIDEN);
4836           if (tem != quotient)
4837             emit_move_insn (quotient, tem);
4838           emit_jump_insn (targetm.gen_jump (label5));
4839           emit_barrier ();
4840           emit_label (label3);
4841           expand_dec (adjusted_op0, const1_rtx);
4842           emit_label (label4);
4843           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4844                               quotient, 0, OPTAB_LIB_WIDEN);
4845           if (tem != quotient)
4846             emit_move_insn (quotient, tem);
4847           expand_dec (quotient, const1_rtx);
4848           emit_label (label5);
4849         }
4850         break;
4851
4852       case CEIL_DIV_EXPR:
4853       case CEIL_MOD_EXPR:
4854         if (unsignedp)
4855           {
4856             if (op1_is_constant
4857                 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4858                 && (HWI_COMPUTABLE_MODE_P (compute_mode)
4859                     || INTVAL (op1) >= 0))
4860               {
4861                 scalar_int_mode int_mode
4862                   = as_a <scalar_int_mode> (compute_mode);
4863                 rtx t1, t2, t3;
4864                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4865                 t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4866                                    floor_log2 (d), tquotient, 1);
4867                 t2 = expand_binop (int_mode, and_optab, op0,
4868                                    gen_int_mode (d - 1, int_mode),
4869                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4870                 t3 = gen_reg_rtx (int_mode);
4871                 t3 = emit_store_flag (t3, NE, t2, const0_rtx, int_mode, 1, 1);
4872                 if (t3 == 0)
4873                   {
4874                     rtx_code_label *lab;
4875                     lab = gen_label_rtx ();
4876                     do_cmp_and_jump (t2, const0_rtx, EQ, int_mode, lab);
4877                     expand_inc (t1, const1_rtx);
4878                     emit_label (lab);
4879                     quotient = t1;
4880                   }
4881                 else
4882                   quotient = force_operand (gen_rtx_PLUS (int_mode, t1, t3),
4883                                             tquotient);
4884                 break;
4885               }
4886
4887             /* Try using an instruction that produces both the quotient and
4888                remainder, using truncation.  We can easily compensate the
4889                quotient or remainder to get ceiling rounding, once we have the
4890                remainder.  Notice that we compute also the final remainder
4891                value here, and return the result right away.  */
4892             if (target == 0 || GET_MODE (target) != compute_mode)
4893               target = gen_reg_rtx (compute_mode);
4894
4895             if (rem_flag)
4896               {
4897                 remainder = (REG_P (target)
4898                              ? target : gen_reg_rtx (compute_mode));
4899                 quotient = gen_reg_rtx (compute_mode);
4900               }
4901             else
4902               {
4903                 quotient = (REG_P (target)
4904                             ? target : gen_reg_rtx (compute_mode));
4905                 remainder = gen_reg_rtx (compute_mode);
4906               }
4907
4908             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4909                                      remainder, 1))
4910               {
4911                 /* This could be computed with a branch-less sequence.
4912                    Save that for later.  */
4913                 rtx_code_label *label = gen_label_rtx ();
4914                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4915                                  compute_mode, label);
4916                 expand_inc (quotient, const1_rtx);
4917                 expand_dec (remainder, op1);
4918                 emit_label (label);
4919                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4920               }
4921
4922             /* No luck with division elimination or divmod.  Have to do it
4923                by conditionally adjusting op0 *and* the result.  */
4924             {
4925               rtx_code_label *label1, *label2;
4926               rtx adjusted_op0, tem;
4927
4928               quotient = gen_reg_rtx (compute_mode);
4929               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4930               label1 = gen_label_rtx ();
4931               label2 = gen_label_rtx ();
4932               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4933                                compute_mode, label1);
4934               emit_move_insn  (quotient, const0_rtx);
4935               emit_jump_insn (targetm.gen_jump (label2));
4936               emit_barrier ();
4937               emit_label (label1);
4938               expand_dec (adjusted_op0, const1_rtx);
4939               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4940                                   quotient, 1, OPTAB_LIB_WIDEN);
4941               if (tem != quotient)
4942                 emit_move_insn (quotient, tem);
4943               expand_inc (quotient, const1_rtx);
4944               emit_label (label2);
4945             }
4946           }
4947         else /* signed */
4948           {
4949             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4950                 && INTVAL (op1) >= 0)
4951               {
4952                 /* This is extremely similar to the code for the unsigned case
4953                    above.  For 2.7 we should merge these variants, but for
4954                    2.6.1 I don't want to touch the code for unsigned since that
4955                    get used in C.  The signed case will only be used by other
4956                    languages (Ada).  */
4957
4958                 rtx t1, t2, t3;
4959                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4960                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4961                                    floor_log2 (d), tquotient, 0);
4962                 t2 = expand_binop (compute_mode, and_optab, op0,
4963                                    gen_int_mode (d - 1, compute_mode),
4964                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4965                 t3 = gen_reg_rtx (compute_mode);
4966                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4967                                       compute_mode, 1, 1);
4968                 if (t3 == 0)
4969                   {
4970                     rtx_code_label *lab;
4971                     lab = gen_label_rtx ();
4972                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4973                     expand_inc (t1, const1_rtx);
4974                     emit_label (lab);
4975                     quotient = t1;
4976                   }
4977                 else
4978                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4979                                                           t1, t3),
4980                                             tquotient);
4981                 break;
4982               }
4983
4984             /* Try using an instruction that produces both the quotient and
4985                remainder, using truncation.  We can easily compensate the
4986                quotient or remainder to get ceiling rounding, once we have the
4987                remainder.  Notice that we compute also the final remainder
4988                value here, and return the result right away.  */
4989             if (target == 0 || GET_MODE (target) != compute_mode)
4990               target = gen_reg_rtx (compute_mode);
4991             if (rem_flag)
4992               {
4993                 remainder= (REG_P (target)
4994                             ? target : gen_reg_rtx (compute_mode));
4995                 quotient = gen_reg_rtx (compute_mode);
4996               }
4997             else
4998               {
4999                 quotient = (REG_P (target)
5000                             ? target : gen_reg_rtx (compute_mode));
5001                 remainder = gen_reg_rtx (compute_mode);
5002               }
5003
5004             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
5005                                      remainder, 0))
5006               {
5007                 /* This could be computed with a branch-less sequence.
5008                    Save that for later.  */
5009                 rtx tem;
5010                 rtx_code_label *label = gen_label_rtx ();
5011                 do_cmp_and_jump (remainder, const0_rtx, EQ,
5012                                  compute_mode, label);
5013                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
5014                                     NULL_RTX, 0, OPTAB_WIDEN);
5015                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
5016                 expand_inc (quotient, const1_rtx);
5017                 expand_dec (remainder, op1);
5018                 emit_label (label);
5019                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5020               }
5021
5022             /* No luck with division elimination or divmod.  Have to do it
5023                by conditionally adjusting op0 *and* the result.  */
5024             {
5025               rtx_code_label *label1, *label2, *label3, *label4, *label5;
5026               rtx adjusted_op0;
5027               rtx tem;
5028
5029               quotient = gen_reg_rtx (compute_mode);
5030               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
5031               label1 = gen_label_rtx ();
5032               label2 = gen_label_rtx ();
5033               label3 = gen_label_rtx ();
5034               label4 = gen_label_rtx ();
5035               label5 = gen_label_rtx ();
5036               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
5037               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
5038                                compute_mode, label1);
5039               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5040                                   quotient, 0, OPTAB_LIB_WIDEN);
5041               if (tem != quotient)
5042                 emit_move_insn (quotient, tem);
5043               emit_jump_insn (targetm.gen_jump (label5));
5044               emit_barrier ();
5045               emit_label (label1);
5046               expand_dec (adjusted_op0, const1_rtx);
5047               emit_jump_insn (targetm.gen_jump (label4));
5048               emit_barrier ();
5049               emit_label (label2);
5050               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
5051                                compute_mode, label3);
5052               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5053                                   quotient, 0, OPTAB_LIB_WIDEN);
5054               if (tem != quotient)
5055                 emit_move_insn (quotient, tem);
5056               emit_jump_insn (targetm.gen_jump (label5));
5057               emit_barrier ();
5058               emit_label (label3);
5059               expand_inc (adjusted_op0, const1_rtx);
5060               emit_label (label4);
5061               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5062                                   quotient, 0, OPTAB_LIB_WIDEN);
5063               if (tem != quotient)
5064                 emit_move_insn (quotient, tem);
5065               expand_inc (quotient, const1_rtx);
5066               emit_label (label5);
5067             }
5068           }
5069         break;
5070
5071       case EXACT_DIV_EXPR:
5072         if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
5073           {
5074             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5075             int size = GET_MODE_BITSIZE (int_mode);
5076             HOST_WIDE_INT d = INTVAL (op1);
5077             unsigned HOST_WIDE_INT ml;
5078             int pre_shift;
5079             rtx t1;
5080
5081             pre_shift = ctz_or_zero (d);
5082             ml = invert_mod2n (d >> pre_shift, size);
5083             t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5084                                pre_shift, NULL_RTX, unsignedp);
5085             quotient = expand_mult (int_mode, t1, gen_int_mode (ml, int_mode),
5086                                     NULL_RTX, 1);
5087
5088             insn = get_last_insn ();
5089             set_dst_reg_note (insn, REG_EQUAL,
5090                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5091                                               int_mode, op0, op1),
5092                               quotient);
5093           }
5094         break;
5095
5096       case ROUND_DIV_EXPR:
5097       case ROUND_MOD_EXPR:
5098         if (unsignedp)
5099           {
5100             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5101             rtx tem;
5102             rtx_code_label *label;
5103             label = gen_label_rtx ();
5104             quotient = gen_reg_rtx (int_mode);
5105             remainder = gen_reg_rtx (int_mode);
5106             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
5107               {
5108                 rtx tem;
5109                 quotient = expand_binop (int_mode, udiv_optab, op0, op1,
5110                                          quotient, 1, OPTAB_LIB_WIDEN);
5111                 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 1);
5112                 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5113                                           remainder, 1, OPTAB_LIB_WIDEN);
5114               }
5115             tem = plus_constant (int_mode, op1, -1);
5116             tem = expand_shift (RSHIFT_EXPR, int_mode, tem, 1, NULL_RTX, 1);
5117             do_cmp_and_jump (remainder, tem, LEU, int_mode, label);
5118             expand_inc (quotient, const1_rtx);
5119             expand_dec (remainder, op1);
5120             emit_label (label);
5121           }
5122         else
5123           {
5124             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5125             int size = GET_MODE_BITSIZE (int_mode);
5126             rtx abs_rem, abs_op1, tem, mask;
5127             rtx_code_label *label;
5128             label = gen_label_rtx ();
5129             quotient = gen_reg_rtx (int_mode);
5130             remainder = gen_reg_rtx (int_mode);
5131             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
5132               {
5133                 rtx tem;
5134                 quotient = expand_binop (int_mode, sdiv_optab, op0, op1,
5135                                          quotient, 0, OPTAB_LIB_WIDEN);
5136                 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 0);
5137                 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5138                                           remainder, 0, OPTAB_LIB_WIDEN);
5139               }
5140             abs_rem = expand_abs (int_mode, remainder, NULL_RTX, 1, 0);
5141             abs_op1 = expand_abs (int_mode, op1, NULL_RTX, 1, 0);
5142             tem = expand_shift (LSHIFT_EXPR, int_mode, abs_rem,
5143                                 1, NULL_RTX, 1);
5144             do_cmp_and_jump (tem, abs_op1, LTU, int_mode, label);
5145             tem = expand_binop (int_mode, xor_optab, op0, op1,
5146                                 NULL_RTX, 0, OPTAB_WIDEN);
5147             mask = expand_shift (RSHIFT_EXPR, int_mode, tem,
5148                                  size - 1, NULL_RTX, 0);
5149             tem = expand_binop (int_mode, xor_optab, mask, const1_rtx,
5150                                 NULL_RTX, 0, OPTAB_WIDEN);
5151             tem = expand_binop (int_mode, sub_optab, tem, mask,
5152                                 NULL_RTX, 0, OPTAB_WIDEN);
5153             expand_inc (quotient, tem);
5154             tem = expand_binop (int_mode, xor_optab, mask, op1,
5155                                 NULL_RTX, 0, OPTAB_WIDEN);
5156             tem = expand_binop (int_mode, sub_optab, tem, mask,
5157                                 NULL_RTX, 0, OPTAB_WIDEN);
5158             expand_dec (remainder, tem);
5159             emit_label (label);
5160           }
5161         return gen_lowpart (mode, rem_flag ? remainder : quotient);
5162
5163       default:
5164         gcc_unreachable ();
5165       }
5166
5167   if (quotient == 0)
5168     {
5169       if (target && GET_MODE (target) != compute_mode)
5170         target = 0;
5171
5172       if (rem_flag)
5173         {
5174           /* Try to produce the remainder without producing the quotient.
5175              If we seem to have a divmod pattern that does not require widening,
5176              don't try widening here.  We should really have a WIDEN argument
5177              to expand_twoval_binop, since what we'd really like to do here is
5178              1) try a mod insn in compute_mode
5179              2) try a divmod insn in compute_mode
5180              3) try a div insn in compute_mode and multiply-subtract to get
5181                 remainder
5182              4) try the same things with widening allowed.  */
5183           remainder
5184             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5185                                  op0, op1, target,
5186                                  unsignedp,
5187                                  ((optab_handler (optab2, compute_mode)
5188                                    != CODE_FOR_nothing)
5189                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
5190           if (remainder == 0)
5191             {
5192               /* No luck there.  Can we do remainder and divide at once
5193                  without a library call?  */
5194               remainder = gen_reg_rtx (compute_mode);
5195               if (! expand_twoval_binop ((unsignedp
5196                                           ? udivmod_optab
5197                                           : sdivmod_optab),
5198                                          op0, op1,
5199                                          NULL_RTX, remainder, unsignedp))
5200                 remainder = 0;
5201             }
5202
5203           if (remainder)
5204             return gen_lowpart (mode, remainder);
5205         }
5206
5207       /* Produce the quotient.  Try a quotient insn, but not a library call.
5208          If we have a divmod in this mode, use it in preference to widening
5209          the div (for this test we assume it will not fail). Note that optab2
5210          is set to the one of the two optabs that the call below will use.  */
5211       quotient
5212         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5213                              op0, op1, rem_flag ? NULL_RTX : target,
5214                              unsignedp,
5215                              ((optab_handler (optab2, compute_mode)
5216                                != CODE_FOR_nothing)
5217                               ? OPTAB_DIRECT : OPTAB_WIDEN));
5218
5219       if (quotient == 0)
5220         {
5221           /* No luck there.  Try a quotient-and-remainder insn,
5222              keeping the quotient alone.  */
5223           quotient = gen_reg_rtx (compute_mode);
5224           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5225                                      op0, op1,
5226                                      quotient, NULL_RTX, unsignedp))
5227             {
5228               quotient = 0;
5229               if (! rem_flag)
5230                 /* Still no luck.  If we are not computing the remainder,
5231                    use a library call for the quotient.  */
5232                 quotient = sign_expand_binop (compute_mode,
5233                                               udiv_optab, sdiv_optab,
5234                                               op0, op1, target,
5235                                               unsignedp, OPTAB_LIB_WIDEN);
5236             }
5237         }
5238     }
5239
5240   if (rem_flag)
5241     {
5242       if (target && GET_MODE (target) != compute_mode)
5243         target = 0;
5244
5245       if (quotient == 0)
5246         {
5247           /* No divide instruction either.  Use library for remainder.  */
5248           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5249                                          op0, op1, target,
5250                                          unsignedp, OPTAB_LIB_WIDEN);
5251           /* No remainder function.  Try a quotient-and-remainder
5252              function, keeping the remainder.  */
5253           if (!remainder)
5254             {
5255               remainder = gen_reg_rtx (compute_mode);
5256               if (!expand_twoval_binop_libfunc
5257                   (unsignedp ? udivmod_optab : sdivmod_optab,
5258                    op0, op1,
5259                    NULL_RTX, remainder,
5260                    unsignedp ? UMOD : MOD))
5261                 remainder = NULL_RTX;
5262             }
5263         }
5264       else
5265         {
5266           /* We divided.  Now finish doing X - Y * (X / Y).  */
5267           remainder = expand_mult (compute_mode, quotient, op1,
5268                                    NULL_RTX, unsignedp);
5269           remainder = expand_binop (compute_mode, sub_optab, op0,
5270                                     remainder, target, unsignedp,
5271                                     OPTAB_LIB_WIDEN);
5272         }
5273     }
5274
5275   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5276 }
5277 \f
5278 /* Return a tree node with data type TYPE, describing the value of X.
5279    Usually this is an VAR_DECL, if there is no obvious better choice.
5280    X may be an expression, however we only support those expressions
5281    generated by loop.c.  */
5282
5283 tree
5284 make_tree (tree type, rtx x)
5285 {
5286   tree t;
5287
5288   switch (GET_CODE (x))
5289     {
5290     case CONST_INT:
5291     case CONST_WIDE_INT:
5292       t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5293       return t;
5294
5295     case CONST_DOUBLE:
5296       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5297       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5298         t = wide_int_to_tree (type,
5299                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5300                                                     HOST_BITS_PER_WIDE_INT * 2));
5301       else
5302         t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5303
5304       return t;
5305
5306     case CONST_VECTOR:
5307       {
5308         unsigned int npatterns = CONST_VECTOR_NPATTERNS (x);
5309         unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
5310         tree itype = TREE_TYPE (type);
5311
5312         /* Build a tree with vector elements.  */
5313         tree_vector_builder elts (type, npatterns, nelts_per_pattern);
5314         unsigned int count = elts.encoded_nelts ();
5315         for (unsigned int i = 0; i < count; ++i)
5316           {
5317             rtx elt = CONST_VECTOR_ELT (x, i);
5318             elts.quick_push (make_tree (itype, elt));
5319           }
5320
5321         return elts.build ();
5322       }
5323
5324     case PLUS:
5325       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5326                           make_tree (type, XEXP (x, 1)));
5327
5328     case MINUS:
5329       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5330                           make_tree (type, XEXP (x, 1)));
5331
5332     case NEG:
5333       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5334
5335     case MULT:
5336       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5337                           make_tree (type, XEXP (x, 1)));
5338
5339     case ASHIFT:
5340       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5341                           make_tree (type, XEXP (x, 1)));
5342
5343     case LSHIFTRT:
5344       t = unsigned_type_for (type);
5345       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5346                                          make_tree (t, XEXP (x, 0)),
5347                                          make_tree (type, XEXP (x, 1))));
5348
5349     case ASHIFTRT:
5350       t = signed_type_for (type);
5351       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5352                                          make_tree (t, XEXP (x, 0)),
5353                                          make_tree (type, XEXP (x, 1))));
5354
5355     case DIV:
5356       if (TREE_CODE (type) != REAL_TYPE)
5357         t = signed_type_for (type);
5358       else
5359         t = type;
5360
5361       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5362                                          make_tree (t, XEXP (x, 0)),
5363                                          make_tree (t, XEXP (x, 1))));
5364     case UDIV:
5365       t = unsigned_type_for (type);
5366       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5367                                          make_tree (t, XEXP (x, 0)),
5368                                          make_tree (t, XEXP (x, 1))));
5369
5370     case SIGN_EXTEND:
5371     case ZERO_EXTEND:
5372       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5373                                           GET_CODE (x) == ZERO_EXTEND);
5374       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5375
5376     case CONST:
5377       return make_tree (type, XEXP (x, 0));
5378
5379     case SYMBOL_REF:
5380       t = SYMBOL_REF_DECL (x);
5381       if (t)
5382         return fold_convert (type, build_fold_addr_expr (t));
5383       /* fall through.  */
5384
5385     default:
5386       if (CONST_POLY_INT_P (x))
5387         return wide_int_to_tree (t, const_poly_int_value (x));
5388
5389       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5390
5391       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5392          address mode to pointer mode.  */
5393       if (POINTER_TYPE_P (type))
5394         x = convert_memory_address_addr_space
5395           (SCALAR_INT_TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5396
5397       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5398          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5399       t->decl_with_rtl.rtl = x;
5400
5401       return t;
5402     }
5403 }
5404 \f
5405 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5406    and returning TARGET.
5407
5408    If TARGET is 0, a pseudo-register or constant is returned.  */
5409
5410 rtx
5411 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5412 {
5413   rtx tem = 0;
5414
5415   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5416     tem = simplify_binary_operation (AND, mode, op0, op1);
5417   if (tem == 0)
5418     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5419
5420   if (target == 0)
5421     target = tem;
5422   else if (tem != target)
5423     emit_move_insn (target, tem);
5424   return target;
5425 }
5426
5427 /* Helper function for emit_store_flag.  */
5428 rtx
5429 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5430              machine_mode mode, machine_mode compare_mode,
5431              int unsignedp, rtx x, rtx y, int normalizep,
5432              machine_mode target_mode)
5433 {
5434   class expand_operand ops[4];
5435   rtx op0, comparison, subtarget;
5436   rtx_insn *last;
5437   scalar_int_mode result_mode = targetm.cstore_mode (icode);
5438   scalar_int_mode int_target_mode;
5439
5440   last = get_last_insn ();
5441   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5442   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5443   if (!x || !y)
5444     {
5445       delete_insns_since (last);
5446       return NULL_RTX;
5447     }
5448
5449   if (target_mode == VOIDmode)
5450     int_target_mode = result_mode;
5451   else
5452     int_target_mode = as_a <scalar_int_mode> (target_mode);
5453   if (!target)
5454     target = gen_reg_rtx (int_target_mode);
5455
5456   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5457
5458   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5459   create_fixed_operand (&ops[1], comparison);
5460   create_fixed_operand (&ops[2], x);
5461   create_fixed_operand (&ops[3], y);
5462   if (!maybe_expand_insn (icode, 4, ops))
5463     {
5464       delete_insns_since (last);
5465       return NULL_RTX;
5466     }
5467   subtarget = ops[0].value;
5468
5469   /* If we are converting to a wider mode, first convert to
5470      INT_TARGET_MODE, then normalize.  This produces better combining
5471      opportunities on machines that have a SIGN_EXTRACT when we are
5472      testing a single bit.  This mostly benefits the 68k.
5473
5474      If STORE_FLAG_VALUE does not have the sign bit set when
5475      interpreted in MODE, we can do this conversion as unsigned, which
5476      is usually more efficient.  */
5477   if (GET_MODE_PRECISION (int_target_mode) > GET_MODE_PRECISION (result_mode))
5478     {
5479       gcc_assert (GET_MODE_PRECISION (result_mode) != 1
5480                   || STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1);
5481
5482       bool unsignedp = (STORE_FLAG_VALUE >= 0);
5483       convert_move (target, subtarget, unsignedp);
5484
5485       op0 = target;
5486       result_mode = int_target_mode;
5487     }
5488   else
5489     op0 = subtarget;
5490
5491   /* If we want to keep subexpressions around, don't reuse our last
5492      target.  */
5493   if (optimize)
5494     subtarget = 0;
5495
5496   /* Now normalize to the proper value in MODE.  Sometimes we don't
5497      have to do anything.  */
5498   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5499     ;
5500   /* STORE_FLAG_VALUE might be the most negative number, so write
5501      the comparison this way to avoid a compiler-time warning.  */
5502   else if (- normalizep == STORE_FLAG_VALUE)
5503     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5504
5505   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5506      it hard to use a value of just the sign bit due to ANSI integer
5507      constant typing rules.  */
5508   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5509     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5510                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5511                         normalizep == 1);
5512   else
5513     {
5514       gcc_assert (STORE_FLAG_VALUE & 1);
5515
5516       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5517       if (normalizep == -1)
5518         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5519     }
5520
5521   /* If we were converting to a smaller mode, do the conversion now.  */
5522   if (int_target_mode != result_mode)
5523     {
5524       convert_move (target, op0, 0);
5525       return target;
5526     }
5527   else
5528     return op0;
5529 }
5530
5531
5532 /* A subroutine of emit_store_flag only including "tricks" that do not
5533    need a recursive call.  These are kept separate to avoid infinite
5534    loops.  */
5535
5536 static rtx
5537 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5538                    machine_mode mode, int unsignedp, int normalizep,
5539                    machine_mode target_mode)
5540 {
5541   rtx subtarget;
5542   enum insn_code icode;
5543   machine_mode compare_mode;
5544   enum mode_class mclass;
5545   enum rtx_code scode;
5546
5547   if (unsignedp)
5548     code = unsigned_condition (code);
5549   scode = swap_condition (code);
5550
5551   /* If one operand is constant, make it the second one.  Only do this
5552      if the other operand is not constant as well.  */
5553
5554   if (swap_commutative_operands_p (op0, op1))
5555     {
5556       std::swap (op0, op1);
5557       code = swap_condition (code);
5558     }
5559
5560   if (mode == VOIDmode)
5561     mode = GET_MODE (op0);
5562
5563   if (CONST_SCALAR_INT_P (op1))
5564     canonicalize_comparison (mode, &code, &op1);
5565
5566   /* For some comparisons with 1 and -1, we can convert this to
5567      comparisons with zero.  This will often produce more opportunities for
5568      store-flag insns.  */
5569
5570   switch (code)
5571     {
5572     case LT:
5573       if (op1 == const1_rtx)
5574         op1 = const0_rtx, code = LE;
5575       break;
5576     case LE:
5577       if (op1 == constm1_rtx)
5578         op1 = const0_rtx, code = LT;
5579       break;
5580     case GE:
5581       if (op1 == const1_rtx)
5582         op1 = const0_rtx, code = GT;
5583       break;
5584     case GT:
5585       if (op1 == constm1_rtx)
5586         op1 = const0_rtx, code = GE;
5587       break;
5588     case GEU:
5589       if (op1 == const1_rtx)
5590         op1 = const0_rtx, code = NE;
5591       break;
5592     case LTU:
5593       if (op1 == const1_rtx)
5594         op1 = const0_rtx, code = EQ;
5595       break;
5596     default:
5597       break;
5598     }
5599
5600   /* If we are comparing a double-word integer with zero or -1, we can
5601      convert the comparison into one involving a single word.  */
5602   scalar_int_mode int_mode;
5603   if (is_int_mode (mode, &int_mode)
5604       && GET_MODE_BITSIZE (int_mode) == BITS_PER_WORD * 2
5605       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5606     {
5607       rtx tem;
5608       if ((code == EQ || code == NE)
5609           && (op1 == const0_rtx || op1 == constm1_rtx))
5610         {
5611           rtx op00, op01;
5612
5613           /* Do a logical OR or AND of the two words and compare the
5614              result.  */
5615           op00 = simplify_gen_subreg (word_mode, op0, int_mode, 0);
5616           op01 = simplify_gen_subreg (word_mode, op0, int_mode, UNITS_PER_WORD);
5617           tem = expand_binop (word_mode,
5618                               op1 == const0_rtx ? ior_optab : and_optab,
5619                               op00, op01, NULL_RTX, unsignedp,
5620                               OPTAB_DIRECT);
5621
5622           if (tem != 0)
5623             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5624                                    unsignedp, normalizep);
5625         }
5626       else if ((code == LT || code == GE) && op1 == const0_rtx)
5627         {
5628           rtx op0h;
5629
5630           /* If testing the sign bit, can just test on high word.  */
5631           op0h = simplify_gen_subreg (word_mode, op0, int_mode,
5632                                       subreg_highpart_offset (word_mode,
5633                                                               int_mode));
5634           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5635                                  unsignedp, normalizep);
5636         }
5637       else
5638         tem = NULL_RTX;
5639
5640       if (tem)
5641         {
5642           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5643             return tem;
5644           if (!target)
5645             target = gen_reg_rtx (target_mode);
5646
5647           convert_move (target, tem,
5648                         !val_signbit_known_set_p (word_mode,
5649                                                   (normalizep ? normalizep
5650                                                    : STORE_FLAG_VALUE)));
5651           return target;
5652         }
5653     }
5654
5655   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5656      complement of A (for GE) and shifting the sign bit to the low bit.  */
5657   if (op1 == const0_rtx && (code == LT || code == GE)
5658       && is_int_mode (mode, &int_mode)
5659       && (normalizep || STORE_FLAG_VALUE == 1
5660           || val_signbit_p (int_mode, STORE_FLAG_VALUE)))
5661     {
5662       scalar_int_mode int_target_mode;
5663       subtarget = target;
5664
5665       if (!target)
5666         int_target_mode = int_mode;
5667       else
5668         {
5669           /* If the result is to be wider than OP0, it is best to convert it
5670              first.  If it is to be narrower, it is *incorrect* to convert it
5671              first.  */
5672           int_target_mode = as_a <scalar_int_mode> (target_mode);
5673           if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (int_mode))
5674             {
5675               op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5676               int_mode = int_target_mode;
5677             }
5678         }
5679
5680       if (int_target_mode != int_mode)
5681         subtarget = 0;
5682
5683       if (code == GE)
5684         op0 = expand_unop (int_mode, one_cmpl_optab, op0,
5685                            ((STORE_FLAG_VALUE == 1 || normalizep)
5686                             ? 0 : subtarget), 0);
5687
5688       if (STORE_FLAG_VALUE == 1 || normalizep)
5689         /* If we are supposed to produce a 0/1 value, we want to do
5690            a logical shift from the sign bit to the low-order bit; for
5691            a -1/0 value, we do an arithmetic shift.  */
5692         op0 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5693                             GET_MODE_BITSIZE (int_mode) - 1,
5694                             subtarget, normalizep != -1);
5695
5696       if (int_mode != int_target_mode)
5697         op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5698
5699       return op0;
5700     }
5701
5702   mclass = GET_MODE_CLASS (mode);
5703   FOR_EACH_MODE_FROM (compare_mode, mode)
5704     {
5705      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5706      icode = optab_handler (cstore_optab, optab_mode);
5707      if (icode != CODE_FOR_nothing)
5708         {
5709           do_pending_stack_adjust ();
5710           rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5711                                  unsignedp, op0, op1, normalizep, target_mode);
5712           if (tem)
5713             return tem;
5714
5715           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5716             {
5717               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5718                                  unsignedp, op1, op0, normalizep, target_mode);
5719               if (tem)
5720                 return tem;
5721             }
5722           break;
5723         }
5724     }
5725
5726   return 0;
5727 }
5728
5729 /* Subroutine of emit_store_flag that handles cases in which the operands
5730    are scalar integers.  SUBTARGET is the target to use for temporary
5731    operations and TRUEVAL is the value to store when the condition is
5732    true.  All other arguments are as for emit_store_flag.  */
5733
5734 rtx
5735 emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0,
5736                      rtx op1, scalar_int_mode mode, int unsignedp,
5737                      int normalizep, rtx trueval)
5738 {
5739   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5740   rtx_insn *last = get_last_insn ();
5741
5742   /* If this is an equality comparison of integers, we can try to exclusive-or
5743      (or subtract) the two operands and use a recursive call to try the
5744      comparison with zero.  Don't do any of these cases if branches are
5745      very cheap.  */
5746
5747   if ((code == EQ || code == NE) && op1 != const0_rtx)
5748     {
5749       rtx tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5750                               OPTAB_WIDEN);
5751
5752       if (tem == 0)
5753         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5754                             OPTAB_WIDEN);
5755       if (tem != 0)
5756         tem = emit_store_flag (target, code, tem, const0_rtx,
5757                                mode, unsignedp, normalizep);
5758       if (tem != 0)
5759         return tem;
5760
5761       delete_insns_since (last);
5762     }
5763
5764   /* For integer comparisons, try the reverse comparison.  However, for
5765      small X and if we'd have anyway to extend, implementing "X != 0"
5766      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5767   rtx_code rcode = reverse_condition (code);
5768   if (can_compare_p (rcode, mode, ccp_store_flag)
5769       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5770             && code == NE
5771             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5772             && op1 == const0_rtx))
5773     {
5774       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5775                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5776
5777       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5778       if (want_add
5779           && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5780                        optimize_insn_for_speed_p ()) == 0)
5781         {
5782           rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5783                                        STORE_FLAG_VALUE, target_mode);
5784           if (tem != 0)
5785             tem = expand_binop (target_mode, add_optab, tem,
5786                                 gen_int_mode (normalizep, target_mode),
5787                                 target, 0, OPTAB_WIDEN);
5788           if (tem != 0)
5789             return tem;
5790         }
5791       else if (!want_add
5792                && rtx_cost (trueval, mode, XOR, 1,
5793                             optimize_insn_for_speed_p ()) == 0)
5794         {
5795           rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5796                                        normalizep, target_mode);
5797           if (tem != 0)
5798             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5799                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5800           if (tem != 0)
5801             return tem;
5802         }
5803
5804       delete_insns_since (last);
5805     }
5806
5807   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5808      the constant zero.  Reject all other comparisons at this point.  Only
5809      do LE and GT if branches are expensive since they are expensive on
5810      2-operand machines.  */
5811
5812   if (op1 != const0_rtx
5813       || (code != EQ && code != NE
5814           && (BRANCH_COST (optimize_insn_for_speed_p (),
5815                            false) <= 1 || (code != LE && code != GT))))
5816     return 0;
5817
5818   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5819      do the necessary operation below.  */
5820
5821   rtx tem = 0;
5822
5823   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5824      the sign bit set.  */
5825
5826   if (code == LE)
5827     {
5828       /* This is destructive, so SUBTARGET can't be OP0.  */
5829       if (rtx_equal_p (subtarget, op0))
5830         subtarget = 0;
5831
5832       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5833                           OPTAB_WIDEN);
5834       if (tem)
5835         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5836                             OPTAB_WIDEN);
5837     }
5838
5839   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5840      number of bits in the mode of OP0, minus one.  */
5841
5842   if (code == GT)
5843     {
5844       if (rtx_equal_p (subtarget, op0))
5845         subtarget = 0;
5846
5847       tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5848                                 GET_MODE_BITSIZE (mode) - 1,
5849                                 subtarget, 0);
5850       if (tem)
5851         tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5852                             OPTAB_WIDEN);
5853     }
5854
5855   if (code == EQ || code == NE)
5856     {
5857       /* For EQ or NE, one way to do the comparison is to apply an operation
5858          that converts the operand into a positive number if it is nonzero
5859          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5860          for NE we negate.  This puts the result in the sign bit.  Then we
5861          normalize with a shift, if needed.
5862
5863          Two operations that can do the above actions are ABS and FFS, so try
5864          them.  If that doesn't work, and MODE is smaller than a full word,
5865          we can use zero-extension to the wider mode (an unsigned conversion)
5866          as the operation.  */
5867
5868       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5869          that is compensated by the subsequent overflow when subtracting
5870          one / negating.  */
5871
5872       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5873         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5874       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5875         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5876       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5877         {
5878           tem = convert_modes (word_mode, mode, op0, 1);
5879           mode = word_mode;
5880         }
5881
5882       if (tem != 0)
5883         {
5884           if (code == EQ)
5885             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5886                                 0, OPTAB_WIDEN);
5887           else
5888             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5889         }
5890
5891       /* If we couldn't do it that way, for NE we can "or" the two's complement
5892          of the value with itself.  For EQ, we take the one's complement of
5893          that "or", which is an extra insn, so we only handle EQ if branches
5894          are expensive.  */
5895
5896       if (tem == 0
5897           && (code == NE
5898               || BRANCH_COST (optimize_insn_for_speed_p (),
5899                               false) > 1))
5900         {
5901           if (rtx_equal_p (subtarget, op0))
5902             subtarget = 0;
5903
5904           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5905           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5906                               OPTAB_WIDEN);
5907
5908           if (tem && code == EQ)
5909             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5910         }
5911     }
5912
5913   if (tem && normalizep)
5914     tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5915                               GET_MODE_BITSIZE (mode) - 1,
5916                               subtarget, normalizep == 1);
5917
5918   if (tem)
5919     {
5920       if (!target)
5921         ;
5922       else if (GET_MODE (tem) != target_mode)
5923         {
5924           convert_move (target, tem, 0);
5925           tem = target;
5926         }
5927       else if (!subtarget)
5928         {
5929           emit_move_insn (target, tem);
5930           tem = target;
5931         }
5932     }
5933   else
5934     delete_insns_since (last);
5935
5936   return tem;
5937 }
5938
5939 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5940    and storing in TARGET.  Normally return TARGET.
5941    Return 0 if that cannot be done.
5942
5943    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5944    it is VOIDmode, they cannot both be CONST_INT.
5945
5946    UNSIGNEDP is for the case where we have to widen the operands
5947    to perform the operation.  It says to use zero-extension.
5948
5949    NORMALIZEP is 1 if we should convert the result to be either zero
5950    or one.  Normalize is -1 if we should convert the result to be
5951    either zero or -1.  If NORMALIZEP is zero, the result will be left
5952    "raw" out of the scc insn.  */
5953
5954 rtx
5955 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5956                  machine_mode mode, int unsignedp, int normalizep)
5957 {
5958   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5959   enum rtx_code rcode;
5960   rtx subtarget;
5961   rtx tem, trueval;
5962   rtx_insn *last;
5963
5964   /* If we compare constants, we shouldn't use a store-flag operation,
5965      but a constant load.  We can get there via the vanilla route that
5966      usually generates a compare-branch sequence, but will in this case
5967      fold the comparison to a constant, and thus elide the branch.  */
5968   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5969     return NULL_RTX;
5970
5971   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5972                            target_mode);
5973   if (tem)
5974     return tem;
5975
5976   /* If we reached here, we can't do this with a scc insn, however there
5977      are some comparisons that can be done in other ways.  Don't do any
5978      of these cases if branches are very cheap.  */
5979   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5980     return 0;
5981
5982   /* See what we need to return.  We can only return a 1, -1, or the
5983      sign bit.  */
5984
5985   if (normalizep == 0)
5986     {
5987       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5988         normalizep = STORE_FLAG_VALUE;
5989
5990       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5991         ;
5992       else
5993         return 0;
5994     }
5995
5996   last = get_last_insn ();
5997
5998   /* If optimizing, use different pseudo registers for each insn, instead
5999      of reusing the same pseudo.  This leads to better CSE, but slows
6000      down the compiler, since there are more pseudos.  */
6001   subtarget = (!optimize
6002                && (target_mode == mode)) ? target : NULL_RTX;
6003   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
6004
6005   /* For floating-point comparisons, try the reverse comparison or try
6006      changing the "orderedness" of the comparison.  */
6007   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6008     {
6009       enum rtx_code first_code;
6010       bool and_them;
6011
6012       rcode = reverse_condition_maybe_unordered (code);
6013       if (can_compare_p (rcode, mode, ccp_store_flag)
6014           && (code == ORDERED || code == UNORDERED
6015               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6016               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6017         {
6018           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
6019                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
6020
6021           /* For the reverse comparison, use either an addition or a XOR.  */
6022           if (want_add
6023               && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
6024                            optimize_insn_for_speed_p ()) == 0)
6025             {
6026               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6027                                        STORE_FLAG_VALUE, target_mode);
6028               if (tem)
6029                 return expand_binop (target_mode, add_optab, tem,
6030                                      gen_int_mode (normalizep, target_mode),
6031                                      target, 0, OPTAB_WIDEN);
6032             }
6033           else if (!want_add
6034                    && rtx_cost (trueval, mode, XOR, 1,
6035                                 optimize_insn_for_speed_p ()) == 0)
6036             {
6037               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6038                                        normalizep, target_mode);
6039               if (tem)
6040                 return expand_binop (target_mode, xor_optab, tem, trueval,
6041                                      target, INTVAL (trueval) >= 0,
6042                                      OPTAB_WIDEN);
6043             }
6044         }
6045
6046       delete_insns_since (last);
6047
6048       /* Cannot split ORDERED and UNORDERED, only try the above trick.  */
6049       if (code == ORDERED || code == UNORDERED)
6050         return 0;
6051
6052       and_them = split_comparison (code, mode, &first_code, &code);
6053
6054       /* If there are no NaNs, the first comparison should always fall through.
6055          Effectively change the comparison to the other one.  */
6056       if (!HONOR_NANS (mode))
6057         {
6058           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
6059           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
6060                                     target_mode);
6061         }
6062
6063       if (!HAVE_conditional_move)
6064         return 0;
6065
6066       /* Do not turn a trapping comparison into a non-trapping one.  */
6067       if ((code != EQ && code != NE && code != UNEQ && code != LTGT)
6068           && flag_trapping_math)
6069         return 0;
6070
6071       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
6072          conditional move.  */
6073       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
6074                                normalizep, target_mode);
6075       if (tem == 0)
6076         return 0;
6077
6078       if (and_them)
6079         tem = emit_conditional_move (target, code, op0, op1, mode,
6080                                      tem, const0_rtx, GET_MODE (tem), 0);
6081       else
6082         tem = emit_conditional_move (target, code, op0, op1, mode,
6083                                      trueval, tem, GET_MODE (tem), 0);
6084
6085       if (tem == 0)
6086         delete_insns_since (last);
6087       return tem;
6088     }
6089
6090   /* The remaining tricks only apply to integer comparisons.  */
6091
6092   scalar_int_mode int_mode;
6093   if (is_int_mode (mode, &int_mode))
6094     return emit_store_flag_int (target, subtarget, code, op0, op1, int_mode,
6095                                 unsignedp, normalizep, trueval);
6096
6097   return 0;
6098 }
6099
6100 /* Like emit_store_flag, but always succeeds.  */
6101
6102 rtx
6103 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
6104                        machine_mode mode, int unsignedp, int normalizep)
6105 {
6106   rtx tem;
6107   rtx_code_label *label;
6108   rtx trueval, falseval;
6109
6110   /* First see if emit_store_flag can do the job.  */
6111   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
6112   if (tem != 0)
6113     return tem;
6114
6115   /* If one operand is constant, make it the second one.  Only do this
6116      if the other operand is not constant as well.  */
6117   if (swap_commutative_operands_p (op0, op1))
6118     {
6119       std::swap (op0, op1);
6120       code = swap_condition (code);
6121     }
6122
6123   if (mode == VOIDmode)
6124     mode = GET_MODE (op0);
6125
6126   if (!target)
6127     target = gen_reg_rtx (word_mode);
6128
6129   /* If this failed, we have to do this with set/compare/jump/set code.
6130      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
6131   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
6132   if (code == NE
6133       && GET_MODE_CLASS (mode) == MODE_INT
6134       && REG_P (target)
6135       && op0 == target
6136       && op1 == const0_rtx)
6137     {
6138       label = gen_label_rtx ();
6139       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
6140                                NULL_RTX, NULL, label,
6141                                profile_probability::uninitialized ());
6142       emit_move_insn (target, trueval);
6143       emit_label (label);
6144       return target;
6145     }
6146
6147   if (!REG_P (target)
6148       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
6149     target = gen_reg_rtx (GET_MODE (target));
6150
6151   /* Jump in the right direction if the target cannot implement CODE
6152      but can jump on its reverse condition.  */
6153   falseval = const0_rtx;
6154   if (! can_compare_p (code, mode, ccp_jump)
6155       && (! FLOAT_MODE_P (mode)
6156           || code == ORDERED || code == UNORDERED
6157           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6158           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6159     {
6160       enum rtx_code rcode;
6161       if (FLOAT_MODE_P (mode))
6162         rcode = reverse_condition_maybe_unordered (code);
6163       else
6164         rcode = reverse_condition (code);
6165
6166       /* Canonicalize to UNORDERED for the libcall.  */
6167       if (can_compare_p (rcode, mode, ccp_jump)
6168           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
6169         {
6170           falseval = trueval;
6171           trueval = const0_rtx;
6172           code = rcode;
6173         }
6174     }
6175
6176   emit_move_insn (target, trueval);
6177   label = gen_label_rtx ();
6178   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
6179                            label, profile_probability::uninitialized ());
6180
6181   emit_move_insn (target, falseval);
6182   emit_label (label);
6183
6184   return target;
6185 }
6186
6187 /* Helper function for canonicalize_cmp_for_target.  Swap between inclusive
6188    and exclusive ranges in order to create an equivalent comparison.  See
6189    canonicalize_cmp_for_target for the possible cases.  */
6190
6191 static enum rtx_code
6192 equivalent_cmp_code (enum rtx_code code)
6193 {
6194   switch (code)
6195     {
6196     case GT:
6197       return GE;
6198     case GE:
6199       return GT;
6200     case LT:
6201       return LE;
6202     case LE:
6203       return LT;
6204     case GTU:
6205       return GEU;
6206     case GEU:
6207       return GTU;
6208     case LTU:
6209       return LEU;
6210     case LEU:
6211       return LTU;
6212
6213     default:
6214       return code;
6215     }
6216 }
6217
6218 /* Choose the more appropiate immediate in scalar integer comparisons.  The
6219    purpose of this is to end up with an immediate which can be loaded into a
6220    register in fewer moves, if possible.
6221
6222    For each integer comparison there exists an equivalent choice:
6223      i)   a >  b or a >= b + 1
6224      ii)  a <= b or a <  b + 1
6225      iii) a >= b or a >  b - 1
6226      iv)  a <  b or a <= b - 1
6227
6228    MODE is the mode of the first operand.
6229    CODE points to the comparison code.
6230    IMM points to the rtx containing the immediate.  *IMM must satisfy
6231    CONST_SCALAR_INT_P on entry and continues to satisfy CONST_SCALAR_INT_P
6232    on exit.  */
6233
6234 void
6235 canonicalize_comparison (machine_mode mode, enum rtx_code *code, rtx *imm)
6236 {
6237   if (!SCALAR_INT_MODE_P (mode))
6238     return;
6239
6240   int to_add = 0;
6241   enum signop sgn = unsigned_condition_p (*code) ? UNSIGNED : SIGNED;
6242
6243   /* Extract the immediate value from the rtx.  */
6244   wide_int imm_val = rtx_mode_t (*imm, mode);
6245
6246   if (*code == GT || *code == GTU || *code == LE || *code == LEU)
6247     to_add = 1;
6248   else if (*code == GE || *code == GEU || *code == LT || *code == LTU)
6249     to_add = -1;
6250   else
6251     return;
6252
6253   /* Check for overflow/underflow in the case of signed values and
6254      wrapping around in the case of unsigned values.  If any occur
6255      cancel the optimization.  */
6256   wi::overflow_type overflow = wi::OVF_NONE;
6257   wide_int imm_modif;
6258
6259   if (to_add == 1)
6260     imm_modif = wi::add (imm_val, 1, sgn, &overflow);
6261   else
6262     imm_modif = wi::sub (imm_val, 1, sgn, &overflow);
6263
6264   if (overflow)
6265     return;
6266
6267   /* The following creates a pseudo; if we cannot do that, bail out.  */
6268   if (!can_create_pseudo_p ())
6269     return;
6270
6271   rtx reg = gen_rtx_REG (mode, LAST_VIRTUAL_REGISTER + 1);
6272   rtx new_imm = immed_wide_int_const (imm_modif, mode);
6273
6274   rtx_insn *old_rtx = gen_move_insn (reg, *imm);
6275   rtx_insn *new_rtx = gen_move_insn (reg, new_imm);
6276
6277   /* Update the immediate and the code.  */
6278   if (insn_cost (old_rtx, true) > insn_cost (new_rtx, true))
6279     {
6280       *code = equivalent_cmp_code (*code);
6281       *imm = new_imm;
6282     }
6283 }
6284
6285
6286 \f
6287 /* Perform possibly multi-word comparison and conditional jump to LABEL
6288    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
6289    now a thin wrapper around do_compare_rtx_and_jump.  */
6290
6291 static void
6292 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6293                  rtx_code_label *label)
6294 {
6295   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6296   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6297                            NULL, label, profile_probability::uninitialized ());
6298 }