gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2018 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "target.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "predict.h"
  30 #include "memmodel.h"
  31 #include "tm_p.h"
  32 #include "expmed.h"
  33 #include "optabs.h"
  34 #include "regs.h"
  35 #include "emit-rtl.h"
  36 #include "diagnostic-core.h"
  37 #include "fold-const.h"
  38 #include "stor-layout.h"
  39 #include "dojump.h"
  40 #include "explow.h"
  41 #include "expr.h"
  42 #include "langhooks.h"
  43 #include "tree-vector-builder.h"
  44
  45 struct target_expmed default_target_expmed;
  46 #if SWITCHABLE_TARGET
  47 struct target_expmed *this_target_expmed = &default_target_expmed;
  48 #endif
  49
  50 static bool store_integral_bit_field (rtx, opt_scalar_int_mode,
  51                                       unsigned HOST_WIDE_INT,
  52                                       unsigned HOST_WIDE_INT,
  53                                       poly_uint64, poly_uint64,
  54                                       machine_mode, rtx, bool, bool);
  55 static void store_fixed_bit_field (rtx, opt_scalar_int_mode,
  56                                    unsigned HOST_WIDE_INT,
  57                                    unsigned HOST_WIDE_INT,
  58                                    poly_uint64, poly_uint64,
  59                                    rtx, scalar_int_mode, bool);
  60 static void store_fixed_bit_field_1 (rtx, scalar_int_mode,
  61                                      unsigned HOST_WIDE_INT,
  62                                      unsigned HOST_WIDE_INT,
  63                                      rtx, scalar_int_mode, bool);
  64 static void store_split_bit_field (rtx, opt_scalar_int_mode,
  65                                    unsigned HOST_WIDE_INT,
  66                                    unsigned HOST_WIDE_INT,
  67                                    poly_uint64, poly_uint64,
  68                                    rtx, scalar_int_mode, bool);
  69 static rtx extract_integral_bit_field (rtx, opt_scalar_int_mode,
  70                                        unsigned HOST_WIDE_INT,
  71                                        unsigned HOST_WIDE_INT, int, rtx,
  72                                        machine_mode, machine_mode, bool, bool);
  73 static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode,
  74                                     unsigned HOST_WIDE_INT,
  75                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  76 static rtx extract_fixed_bit_field_1 (machine_mode, rtx, scalar_int_mode,
  77                                       unsigned HOST_WIDE_INT,
  78                                       unsigned HOST_WIDE_INT, rtx, int, bool);
  79 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  80 static rtx extract_split_bit_field (rtx, opt_scalar_int_mode,
  81                                     unsigned HOST_WIDE_INT,
  82                                     unsigned HOST_WIDE_INT, int, bool);
  83 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  84 static rtx expand_smod_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
  85 static rtx expand_sdiv_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
  86
  87 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  88    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  89    The mask is truncated if necessary to the width of mode MODE.  The
  90    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
  91
  92 static inline rtx
  93 mask_rtx (scalar_int_mode mode, int bitpos, int bitsize, bool complement)
  94 {
  95   return immed_wide_int_const
  96     (wi::shifted_mask (bitpos, bitsize, complement,
  97                        GET_MODE_PRECISION (mode)), mode);
  98 }
  99
 100 /* Test whether a value is zero of a power of two.  */
 101 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
 102   (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
 103
 104 struct init_expmed_rtl
 105 {
 106   rtx reg;
 107   rtx plus;
 108   rtx neg;
 109   rtx mult;
 110   rtx sdiv;
 111   rtx udiv;
 112   rtx sdiv_32;
 113   rtx smod_32;
 114   rtx wide_mult;
 115   rtx wide_lshr;
 116   rtx wide_trunc;
 117   rtx shift;
 118   rtx shift_mult;
 119   rtx shift_add;
 120   rtx shift_sub0;
 121   rtx shift_sub1;
 122   rtx zext;
 123   rtx trunc;
 124
 125   rtx pow2[MAX_BITS_PER_WORD];
 126   rtx cint[MAX_BITS_PER_WORD];
 127 };
 128
 129 static void
 130 init_expmed_one_conv (struct init_expmed_rtl *all, scalar_int_mode to_mode,
 131                       scalar_int_mode from_mode, bool speed)
 132 {
 133   int to_size, from_size;
 134   rtx which;
 135
 136   to_size = GET_MODE_PRECISION (to_mode);
 137   from_size = GET_MODE_PRECISION (from_mode);
 138
 139   /* Most partial integers have a precision less than the "full"
 140      integer it requires for storage.  In case one doesn't, for
 141      comparison purposes here, reduce the bit size by one in that
 142      case.  */
 143   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 144       && pow2p_hwi (to_size))
 145     to_size --;
 146   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 147       && pow2p_hwi (from_size))
 148     from_size --;
 149
 150   /* Assume cost of zero-extend and sign-extend is the same.  */
 151   which = (to_size < from_size ? all->trunc : all->zext);
 152
 153   PUT_MODE (all->reg, from_mode);
 154   set_convert_cost (to_mode, from_mode, speed,
 155                     set_src_cost (which, to_mode, speed));
 156 }
 157
 158 static void
 159 init_expmed_one_mode (struct init_expmed_rtl *all,
 160                       machine_mode mode, int speed)
 161 {
 162   int m, n, mode_bitsize;
 163   machine_mode mode_from;
 164
 165   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 166
 167   PUT_MODE (all->reg, mode);
 168   PUT_MODE (all->plus, mode);
 169   PUT_MODE (all->neg, mode);
 170   PUT_MODE (all->mult, mode);
 171   PUT_MODE (all->sdiv, mode);
 172   PUT_MODE (all->udiv, mode);
 173   PUT_MODE (all->sdiv_32, mode);
 174   PUT_MODE (all->smod_32, mode);
 175   PUT_MODE (all->wide_trunc, mode);
 176   PUT_MODE (all->shift, mode);
 177   PUT_MODE (all->shift_mult, mode);
 178   PUT_MODE (all->shift_add, mode);
 179   PUT_MODE (all->shift_sub0, mode);
 180   PUT_MODE (all->shift_sub1, mode);
 181   PUT_MODE (all->zext, mode);
 182   PUT_MODE (all->trunc, mode);
 183
 184   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
 185   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
 186   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
 187   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
 188   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
 189
 190   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
 191                                      <= 2 * add_cost (speed, mode)));
 192   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
 193                                      <= 4 * add_cost (speed, mode)));
 194
 195   set_shift_cost (speed, mode, 0, 0);
 196   {
 197     int cost = add_cost (speed, mode);
 198     set_shiftadd_cost (speed, mode, 0, cost);
 199     set_shiftsub0_cost (speed, mode, 0, cost);
 200     set_shiftsub1_cost (speed, mode, 0, cost);
 201   }
 202
 203   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 204   for (m = 1; m < n; m++)
 205     {
 206       XEXP (all->shift, 1) = all->cint[m];
 207       XEXP (all->shift_mult, 1) = all->pow2[m];
 208
 209       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
 210       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
 211                                                        speed));
 212       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
 213                                                         speed));
 214       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
 215                                                         speed));
 216     }
 217
 218   scalar_int_mode int_mode_to;
 219   if (is_a <scalar_int_mode> (mode, &int_mode_to))
 220     {
 221       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 222            mode_from = (machine_mode)(mode_from + 1))
 223         init_expmed_one_conv (all, int_mode_to,
 224                               as_a <scalar_int_mode> (mode_from), speed);
 225
 226       scalar_int_mode wider_mode;
 227       if (GET_MODE_CLASS (int_mode_to) == MODE_INT
 228           && GET_MODE_WIDER_MODE (int_mode_to).exists (&wider_mode))
 229         {
 230           PUT_MODE (all->zext, wider_mode);
 231           PUT_MODE (all->wide_mult, wider_mode);
 232           PUT_MODE (all->wide_lshr, wider_mode);
 233           XEXP (all->wide_lshr, 1)
 234             = gen_int_shift_amount (wider_mode, mode_bitsize);
 235
 236           set_mul_widen_cost (speed, wider_mode,
 237                               set_src_cost (all->wide_mult, wider_mode, speed));
 238           set_mul_highpart_cost (speed, int_mode_to,
 239                                  set_src_cost (all->wide_trunc,
 240                                                int_mode_to, speed));
 241         }
 242     }
 243 }
 244
 245 void
 246 init_expmed (void)
 247 {
 248   struct init_expmed_rtl all;
 249   machine_mode mode = QImode;
 250   int m, speed;
 251
 252   memset (&all, 0, sizeof all);
 253   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 254     {
 255       all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
 256       all.cint[m] = GEN_INT (m);
 257     }
 258
 259   /* Avoid using hard regs in ways which may be unsupported.  */
 260   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 261   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 262   all.neg = gen_rtx_NEG (mode, all.reg);
 263   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 264   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 265   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 266   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 267   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 268   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 269   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 270   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 271   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 272   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 273   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 274   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 275   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 276   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 277   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 278
 279   for (speed = 0; speed < 2; speed++)
 280     {
 281       crtl->maybe_hot_insn_p = speed;
 282       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
 283
 284       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 285            mode = (machine_mode)(mode + 1))
 286         init_expmed_one_mode (&all, mode, speed);
 287
 288       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 289         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 290              mode = (machine_mode)(mode + 1))
 291           init_expmed_one_mode (&all, mode, speed);
 292
 293       if (MIN_MODE_VECTOR_INT != VOIDmode)
 294         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 295              mode = (machine_mode)(mode + 1))
 296           init_expmed_one_mode (&all, mode, speed);
 297     }
 298
 299   if (alg_hash_used_p ())
 300     {
 301       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 302       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 303     }
 304   else
 305     set_alg_hash_used_p (true);
 306   default_rtl_profile ();
 307
 308   ggc_free (all.trunc);
 309   ggc_free (all.shift_sub1);
 310   ggc_free (all.shift_sub0);
 311   ggc_free (all.shift_add);
 312   ggc_free (all.shift_mult);
 313   ggc_free (all.shift);
 314   ggc_free (all.wide_trunc);
 315   ggc_free (all.wide_lshr);
 316   ggc_free (all.wide_mult);
 317   ggc_free (all.zext);
 318   ggc_free (all.smod_32);
 319   ggc_free (all.sdiv_32);
 320   ggc_free (all.udiv);
 321   ggc_free (all.sdiv);
 322   ggc_free (all.mult);
 323   ggc_free (all.neg);
 324   ggc_free (all.plus);
 325   ggc_free (all.reg);
 326 }
 327
 328 /* Return an rtx representing minus the value of X.
 329    MODE is the intended mode of the result,
 330    useful if X is a CONST_INT.  */
 331
 332 rtx
 333 negate_rtx (machine_mode mode, rtx x)
 334 {
 335   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 336
 337   if (result == 0)
 338     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 339
 340   return result;
 341 }
 342
 343 /* Whether reverse storage order is supported on the target.  */
 344 static int reverse_storage_order_supported = -1;
 345
 346 /* Check whether reverse storage order is supported on the target.  */
 347
 348 static void
 349 check_reverse_storage_order_support (void)
 350 {
 351   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 352     {
 353       reverse_storage_order_supported = 0;
 354       sorry ("reverse scalar storage order");
 355     }
 356   else
 357     reverse_storage_order_supported = 1;
 358 }
 359
 360 /* Whether reverse FP storage order is supported on the target.  */
 361 static int reverse_float_storage_order_supported = -1;
 362
 363 /* Check whether reverse FP storage order is supported on the target.  */
 364
 365 static void
 366 check_reverse_float_storage_order_support (void)
 367 {
 368   if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 369     {
 370       reverse_float_storage_order_supported = 0;
 371       sorry ("reverse floating-point scalar storage order");
 372     }
 373   else
 374     reverse_float_storage_order_supported = 1;
 375 }
 376
 377 /* Return an rtx representing value of X with reverse storage order.
 378    MODE is the intended mode of the result,
 379    useful if X is a CONST_INT.  */
 380
 381 rtx
 382 flip_storage_order (machine_mode mode, rtx x)
 383 {
 384   scalar_int_mode int_mode;
 385   rtx result;
 386
 387   if (mode == QImode)
 388     return x;
 389
 390   if (COMPLEX_MODE_P (mode))
 391     {
 392       rtx real = read_complex_part (x, false);
 393       rtx imag = read_complex_part (x, true);
 394
 395       real = flip_storage_order (GET_MODE_INNER (mode), real);
 396       imag = flip_storage_order (GET_MODE_INNER (mode), imag);
 397
 398       return gen_rtx_CONCAT (mode, real, imag);
 399     }
 400
 401   if (__builtin_expect (reverse_storage_order_supported < 0, 0))
 402     check_reverse_storage_order_support ();
 403
 404   if (!is_a <scalar_int_mode> (mode, &int_mode))
 405     {
 406       if (FLOAT_MODE_P (mode)
 407           && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
 408         check_reverse_float_storage_order_support ();
 409
 410       if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode))
 411         {
 412           sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
 413           return x;
 414         }
 415       x = gen_lowpart (int_mode, x);
 416     }
 417
 418   result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
 419   if (result == 0)
 420     result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
 421
 422   if (int_mode != mode)
 423     result = gen_lowpart (mode, result);
 424
 425   return result;
 426 }
 427
 428 /* If MODE is set, adjust bitfield memory MEM so that it points to the
 429    first unit of mode MODE that contains a bitfield of size BITSIZE at
 430    bit position BITNUM.  If MODE is not set, return a BLKmode reference
 431    to every byte in the bitfield.  Set *NEW_BITNUM to the bit position
 432    of the field within the new memory.  */
 433
 434 static rtx
 435 narrow_bit_field_mem (rtx mem, opt_scalar_int_mode mode,
 436                       unsigned HOST_WIDE_INT bitsize,
 437                       unsigned HOST_WIDE_INT bitnum,
 438                       unsigned HOST_WIDE_INT *new_bitnum)
 439 {
 440   scalar_int_mode imode;
 441   if (mode.exists (&imode))
 442     {
 443       unsigned int unit = GET_MODE_BITSIZE (imode);
 444       *new_bitnum = bitnum % unit;
 445       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 446       return adjust_bitfield_address (mem, imode, offset);
 447     }
 448   else
 449     {
 450       *new_bitnum = bitnum % BITS_PER_UNIT;
 451       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 452       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 453                             / BITS_PER_UNIT);
 454       return adjust_bitfield_address_size (mem, BLKmode, offset, size);
 455     }
 456 }
 457
 458 /* The caller wants to perform insertion or extraction PATTERN on a
 459    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 460    BITREGION_START and BITREGION_END are as for store_bit_field
 461    and FIELDMODE is the natural mode of the field.
 462
 463    Search for a mode that is compatible with the memory access
 464    restrictions and (where applicable) with a register insertion or
 465    extraction.  Return the new memory on success, storing the adjusted
 466    bit position in *NEW_BITNUM.  Return null otherwise.  */
 467
 468 static rtx
 469 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 470                               rtx op0, HOST_WIDE_INT bitsize,
 471                               HOST_WIDE_INT bitnum,
 472                               poly_uint64 bitregion_start,
 473                               poly_uint64 bitregion_end,
 474                               machine_mode fieldmode,
 475                               unsigned HOST_WIDE_INT *new_bitnum)
 476 {
 477   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 478                                 bitregion_end, MEM_ALIGN (op0),
 479                                 MEM_VOLATILE_P (op0));
 480   scalar_int_mode best_mode;
 481   if (iter.next_mode (&best_mode))
 482     {
 483       /* We can use a memory in BEST_MODE.  See whether this is true for
 484          any wider modes.  All other things being equal, we prefer to
 485          use the widest mode possible because it tends to expose more
 486          CSE opportunities.  */
 487       if (!iter.prefer_smaller_modes ())
 488         {
 489           /* Limit the search to the mode required by the corresponding
 490              register insertion or extraction instruction, if any.  */
 491           scalar_int_mode limit_mode = word_mode;
 492           extraction_insn insn;
 493           if (get_best_reg_extraction_insn (&insn, pattern,
 494                                             GET_MODE_BITSIZE (best_mode),
 495                                             fieldmode))
 496             limit_mode = insn.field_mode;
 497
 498           scalar_int_mode wider_mode;
 499           while (iter.next_mode (&wider_mode)
 500                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 501             best_mode = wider_mode;
 502         }
 503       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 504                                    new_bitnum);
 505     }
 506   return NULL_RTX;
 507 }
 508
 509 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 510    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 511    offset is then BITNUM / BITS_PER_UNIT.  */
 512
 513 static bool
 514 lowpart_bit_field_p (poly_uint64 bitnum, poly_uint64 bitsize,
 515                      machine_mode struct_mode)
 516 {
 517   poly_uint64 regsize = REGMODE_NATURAL_SIZE (struct_mode);
 518   if (BYTES_BIG_ENDIAN)
 519     return (multiple_p (bitnum, BITS_PER_UNIT)
 520             && (known_eq (bitnum + bitsize, GET_MODE_BITSIZE (struct_mode))
 521                 || multiple_p (bitnum + bitsize,
 522                                regsize * BITS_PER_UNIT)));
 523   else
 524     return multiple_p (bitnum, regsize * BITS_PER_UNIT);
 525 }
 526
 527 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 528    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 529    Return false if the access would touch memory outside the range
 530    BITREGION_START to BITREGION_END for conformance to the C++ memory
 531    model.  */
 532
 533 static bool
 534 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 535                             unsigned HOST_WIDE_INT bitnum,
 536                             scalar_int_mode fieldmode,
 537                             poly_uint64 bitregion_start,
 538                             poly_uint64 bitregion_end)
 539 {
 540   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 541
 542   /* -fstrict-volatile-bitfields must be enabled and we must have a
 543      volatile MEM.  */
 544   if (!MEM_P (op0)
 545       || !MEM_VOLATILE_P (op0)
 546       || flag_strict_volatile_bitfields <= 0)
 547     return false;
 548
 549   /* The bit size must not be larger than the field mode, and
 550      the field mode must not be larger than a word.  */
 551   if (bitsize > modesize || modesize > BITS_PER_WORD)
 552     return false;
 553
 554   /* Check for cases of unaligned fields that must be split.  */
 555   if (bitnum % modesize + bitsize > modesize)
 556     return false;
 557
 558   /* The memory must be sufficiently aligned for a MODESIZE access.
 559      This condition guarantees, that the memory access will not
 560      touch anything after the end of the structure.  */
 561   if (MEM_ALIGN (op0) < modesize)
 562     return false;
 563
 564   /* Check for cases where the C++ memory model applies.  */
 565   if (maybe_ne (bitregion_end, 0U)
 566       && (maybe_lt (bitnum - bitnum % modesize, bitregion_start)
 567           || maybe_gt (bitnum - bitnum % modesize + modesize - 1,
 568                        bitregion_end)))
 569     return false;
 570
 571   return true;
 572 }
 573
 574 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 575    bit number BITNUM can be treated as a simple value of mode MODE.
 576    Store the byte offset in *BYTENUM if so.  */
 577
 578 static bool
 579 simple_mem_bitfield_p (rtx op0, poly_uint64 bitsize, poly_uint64 bitnum,
 580                        machine_mode mode, poly_uint64 *bytenum)
 581 {
 582   return (MEM_P (op0)
 583           && multiple_p (bitnum, BITS_PER_UNIT, bytenum)
 584           && known_eq (bitsize, GET_MODE_BITSIZE (mode))
 585           && (!targetm.slow_unaligned_access (mode, MEM_ALIGN (op0))
 586               || (multiple_p (bitnum, GET_MODE_ALIGNMENT (mode))
 587                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 588 }
 589 \f
 590 /* Try to use instruction INSV to store VALUE into a field of OP0.
 591    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a
 592    BLKmode MEM.  VALUE_MODE is the mode of VALUE.  BITSIZE and BITNUM
 593    are as for store_bit_field.  */
 594
 595 static bool
 596 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 597                             opt_scalar_int_mode op0_mode,
 598                             unsigned HOST_WIDE_INT bitsize,
 599                             unsigned HOST_WIDE_INT bitnum,
 600                             rtx value, scalar_int_mode value_mode)
 601 {
 602   struct expand_operand ops[4];
 603   rtx value1;
 604   rtx xop0 = op0;
 605   rtx_insn *last = get_last_insn ();
 606   bool copy_back = false;
 607
 608   scalar_int_mode op_mode = insv->field_mode;
 609   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 610   if (bitsize == 0 || bitsize > unit)
 611     return false;
 612
 613   if (MEM_P (xop0))
 614     /* Get a reference to the first byte of the field.  */
 615     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 616                                  &bitnum);
 617   else
 618     {
 619       /* Convert from counting within OP0 to counting in OP_MODE.  */
 620       if (BYTES_BIG_ENDIAN)
 621         bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
 622
 623       /* If xop0 is a register, we need it in OP_MODE
 624          to make it acceptable to the format of insv.  */
 625       if (GET_CODE (xop0) == SUBREG)
 626         /* We can't just change the mode, because this might clobber op0,
 627            and we will need the original value of op0 if insv fails.  */
 628         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 629       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 630         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 631     }
 632
 633   /* If the destination is a paradoxical subreg such that we need a
 634      truncate to the inner mode, perform the insertion on a temporary and
 635      truncate the result to the original destination.  Note that we can't
 636      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 637      X) 0)) is (reg:N X).  */
 638   if (GET_CODE (xop0) == SUBREG
 639       && REG_P (SUBREG_REG (xop0))
 640       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 641                                          op_mode))
 642     {
 643       rtx tem = gen_reg_rtx (op_mode);
 644       emit_move_insn (tem, xop0);
 645       xop0 = tem;
 646       copy_back = true;
 647     }
 648
 649   /* There are similar overflow check at the start of store_bit_field_1,
 650      but that only check the situation where the field lies completely
 651      outside the register, while there do have situation where the field
 652      lies partialy in the register, we need to adjust bitsize for this
 653      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
 654      will broken on those arch support bit insert instruction, like arm, aarch64
 655      etc.  */
 656   if (bitsize + bitnum > unit && bitnum < unit)
 657     {
 658       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
 659                "destination object, data truncated into %wu-bit",
 660                bitsize, unit - bitnum);
 661       bitsize = unit - bitnum;
 662     }
 663
 664   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 665      "backwards" from the size of the unit we are inserting into.
 666      Otherwise, we count bits from the most significant on a
 667      BYTES/BITS_BIG_ENDIAN machine.  */
 668
 669   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 670     bitnum = unit - bitsize - bitnum;
 671
 672   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 673   value1 = value;
 674   if (value_mode != op_mode)
 675     {
 676       if (GET_MODE_BITSIZE (value_mode) >= bitsize)
 677         {
 678           rtx tmp;
 679           /* Optimization: Don't bother really extending VALUE
 680              if it has all the bits we will actually use.  However,
 681              if we must narrow it, be sure we do it correctly.  */
 682
 683           if (GET_MODE_SIZE (value_mode) < GET_MODE_SIZE (op_mode))
 684             {
 685               tmp = simplify_subreg (op_mode, value1, value_mode, 0);
 686               if (! tmp)
 687                 tmp = simplify_gen_subreg (op_mode,
 688                                            force_reg (value_mode, value1),
 689                                            value_mode, 0);
 690             }
 691           else
 692             {
 693               tmp = gen_lowpart_if_possible (op_mode, value1);
 694               if (! tmp)
 695                 tmp = gen_lowpart (op_mode, force_reg (value_mode, value1));
 696             }
 697           value1 = tmp;
 698         }
 699       else if (CONST_INT_P (value))
 700         value1 = gen_int_mode (INTVAL (value), op_mode);
 701       else
 702         /* Parse phase is supposed to make VALUE's data type
 703            match that of the component reference, which is a type
 704            at least as wide as the field; so VALUE should have
 705            a mode that corresponds to that type.  */
 706         gcc_assert (CONSTANT_P (value));
 707     }
 708
 709   create_fixed_operand (&ops[0], xop0);
 710   create_integer_operand (&ops[1], bitsize);
 711   create_integer_operand (&ops[2], bitnum);
 712   create_input_operand (&ops[3], value1, op_mode);
 713   if (maybe_expand_insn (insv->icode, 4, ops))
 714     {
 715       if (copy_back)
 716         convert_move (op0, xop0, true);
 717       return true;
 718     }
 719   delete_insns_since (last);
 720   return false;
 721 }
 722
 723 /* A subroutine of store_bit_field, with the same arguments.  Return true
 724    if the operation could be implemented.
 725
 726    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 727    no other way of implementing the operation.  If FALLBACK_P is false,
 728    return false instead.  */
 729
 730 static bool
 731 store_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
 732                    poly_uint64 bitregion_start, poly_uint64 bitregion_end,
 733                    machine_mode fieldmode,
 734                    rtx value, bool reverse, bool fallback_p)
 735 {
 736   rtx op0 = str_rtx;
 737
 738   while (GET_CODE (op0) == SUBREG)
 739     {
 740       bitnum += subreg_memory_offset (op0) * BITS_PER_UNIT;
 741       op0 = SUBREG_REG (op0);
 742     }
 743
 744   /* No action is needed if the target is a register and if the field
 745      lies completely outside that register.  This can occur if the source
 746      code contains an out-of-bounds access to a small array.  */
 747   if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
 748     return true;
 749
 750   /* Use vec_set patterns for inserting parts of vectors whenever
 751      available.  */
 752   machine_mode outermode = GET_MODE (op0);
 753   scalar_mode innermode = GET_MODE_INNER (outermode);
 754   poly_uint64 pos;
 755   if (VECTOR_MODE_P (outermode)
 756       && !MEM_P (op0)
 757       && optab_handler (vec_set_optab, outermode) != CODE_FOR_nothing
 758       && fieldmode == innermode
 759       && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
 760       && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
 761     {
 762       struct expand_operand ops[3];
 763       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 764
 765       create_fixed_operand (&ops[0], op0);
 766       create_input_operand (&ops[1], value, innermode);
 767       create_integer_operand (&ops[2], pos);
 768       if (maybe_expand_insn (icode, 3, ops))
 769         return true;
 770     }
 771
 772   /* If the target is a register, overwriting the entire object, or storing
 773      a full-word or multi-word field can be done with just a SUBREG.  */
 774   if (!MEM_P (op0)
 775       && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode)))
 776     {
 777       /* Use the subreg machinery either to narrow OP0 to the required
 778          words or to cope with mode punning between equal-sized modes.
 779          In the latter case, use subreg on the rhs side, not lhs.  */
 780       rtx sub;
 781       HOST_WIDE_INT regnum;
 782       poly_uint64 regsize = REGMODE_NATURAL_SIZE (GET_MODE (op0));
 783       if (known_eq (bitnum, 0U)
 784           && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
 785         {
 786           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 787           if (sub)
 788             {
 789               if (reverse)
 790                 sub = flip_storage_order (GET_MODE (op0), sub);
 791               emit_move_insn (op0, sub);
 792               return true;
 793             }
 794         }
 795       else if (constant_multiple_p (bitnum, regsize * BITS_PER_UNIT, &regnum)
 796                && multiple_p (bitsize, regsize * BITS_PER_UNIT))
 797         {
 798           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 799                                      regnum * regsize);
 800           if (sub)
 801             {
 802               if (reverse)
 803                 value = flip_storage_order (fieldmode, value);
 804               emit_move_insn (sub, value);
 805               return true;
 806             }
 807         }
 808     }
 809
 810   /* If the target is memory, storing any naturally aligned field can be
 811      done with a simple store.  For targets that support fast unaligned
 812      memory, any naturally sized, unit aligned field can be done directly.  */
 813   poly_uint64 bytenum;
 814   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode, &bytenum))
 815     {
 816       op0 = adjust_bitfield_address (op0, fieldmode, bytenum);
 817       if (reverse)
 818         value = flip_storage_order (fieldmode, value);
 819       emit_move_insn (op0, value);
 820       return true;
 821     }
 822
 823   /* It's possible we'll need to handle other cases here for
 824      polynomial bitnum and bitsize.  */
 825
 826   /* From here on we need to be looking at a fixed-size insertion.  */
 827   unsigned HOST_WIDE_INT ibitsize = bitsize.to_constant ();
 828   unsigned HOST_WIDE_INT ibitnum = bitnum.to_constant ();
 829
 830   /* Make sure we are playing with integral modes.  Pun with subregs
 831      if we aren't.  This must come after the entire register case above,
 832      since that case is valid for any mode.  The following cases are only
 833      valid for integral modes.  */
 834   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
 835   scalar_int_mode imode;
 836   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
 837     {
 838       if (MEM_P (op0))
 839         op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
 840                                             0, MEM_SIZE (op0));
 841       else
 842         op0 = gen_lowpart (op0_mode.require (), op0);
 843     }
 844
 845   return store_integral_bit_field (op0, op0_mode, ibitsize, ibitnum,
 846                                    bitregion_start, bitregion_end,
 847                                    fieldmode, value, reverse, fallback_p);
 848 }
 849
 850 /* Subroutine of store_bit_field_1, with the same arguments, except
 851    that BITSIZE and BITNUM are constant.  Handle cases specific to
 852    integral modes.  If OP0_MODE is defined, it is the mode of OP0,
 853    otherwise OP0 is a BLKmode MEM.  */
 854
 855 static bool
 856 store_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
 857                           unsigned HOST_WIDE_INT bitsize,
 858                           unsigned HOST_WIDE_INT bitnum,
 859                           poly_uint64 bitregion_start,
 860                           poly_uint64 bitregion_end,
 861                           machine_mode fieldmode,
 862                           rtx value, bool reverse, bool fallback_p)
 863 {
 864   /* Storing an lsb-aligned field in a register
 865      can be done with a movstrict instruction.  */
 866
 867   if (!MEM_P (op0)
 868       && !reverse
 869       && lowpart_bit_field_p (bitnum, bitsize, op0_mode.require ())
 870       && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode))
 871       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 872     {
 873       struct expand_operand ops[2];
 874       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 875       rtx arg0 = op0;
 876       unsigned HOST_WIDE_INT subreg_off;
 877
 878       if (GET_CODE (arg0) == SUBREG)
 879         {
 880           /* Else we've got some float mode source being extracted into
 881              a different float mode destination -- this combination of
 882              subregs results in Severe Tire Damage.  */
 883           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 884                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 885                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 886           arg0 = SUBREG_REG (arg0);
 887         }
 888
 889       subreg_off = bitnum / BITS_PER_UNIT;
 890       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 891         {
 892           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 893
 894           create_fixed_operand (&ops[0], arg0);
 895           /* Shrink the source operand to FIELDMODE.  */
 896           create_convert_operand_to (&ops[1], value, fieldmode, false);
 897           if (maybe_expand_insn (icode, 2, ops))
 898             return true;
 899         }
 900     }
 901
 902   /* Handle fields bigger than a word.  */
 903
 904   if (bitsize > BITS_PER_WORD)
 905     {
 906       /* Here we transfer the words of the field
 907          in the order least significant first.
 908          This is because the most significant word is the one which may
 909          be less than full.
 910          However, only do that if the value is not BLKmode.  */
 911
 912       const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 913       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 914       unsigned int i;
 915       rtx_insn *last;
 916
 917       /* This is the mode we must force value to, so that there will be enough
 918          subwords to extract.  Note that fieldmode will often (always?) be
 919          VOIDmode, because that is what store_field uses to indicate that this
 920          is a bit field, but passing VOIDmode to operand_subword_force
 921          is not allowed.
 922
 923          The mode must be fixed-size, since insertions into variable-sized
 924          objects are meant to be handled before calling this function.  */
 925       fixed_size_mode value_mode = as_a <fixed_size_mode> (GET_MODE (value));
 926       if (value_mode == VOIDmode)
 927         value_mode = smallest_int_mode_for_size (nwords * BITS_PER_WORD);
 928
 929       last = get_last_insn ();
 930       for (i = 0; i < nwords; i++)
 931         {
 932           /* If I is 0, use the low-order word in both field and target;
 933              if I is 1, use the next to lowest word; and so on.  */
 934           unsigned int wordnum = (backwards
 935                                   ? GET_MODE_SIZE (value_mode) / UNITS_PER_WORD
 936                                   - i - 1
 937                                   : i);
 938           unsigned int bit_offset = (backwards ^ reverse
 939                                      ? MAX ((int) bitsize - ((int) i + 1)
 940                                             * BITS_PER_WORD,
 941                                             0)
 942                                      : (int) i * BITS_PER_WORD);
 943           rtx value_word = operand_subword_force (value, wordnum, value_mode);
 944           unsigned HOST_WIDE_INT new_bitsize =
 945             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 946
 947           /* If the remaining chunk doesn't have full wordsize we have
 948              to make sure that for big-endian machines the higher order
 949              bits are used.  */
 950           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 951             {
 952               int shift = BITS_PER_WORD - new_bitsize;
 953               rtx shift_rtx = gen_int_shift_amount (word_mode, shift);
 954               value_word = simplify_expand_binop (word_mode, lshr_optab,
 955                                                   value_word, shift_rtx,
 956                                                   NULL_RTX, true,
 957                                                   OPTAB_LIB_WIDEN);
 958             }
 959
 960           if (!store_bit_field_1 (op0, new_bitsize,
 961                                   bitnum + bit_offset,
 962                                   bitregion_start, bitregion_end,
 963                                   word_mode,
 964                                   value_word, reverse, fallback_p))
 965             {
 966               delete_insns_since (last);
 967               return false;
 968             }
 969         }
 970       return true;
 971     }
 972
 973   /* If VALUE has a floating-point or complex mode, access it as an
 974      integer of the corresponding size.  This can occur on a machine
 975      with 64 bit registers that uses SFmode for float.  It can also
 976      occur for unaligned float or complex fields.  */
 977   rtx orig_value = value;
 978   scalar_int_mode value_mode;
 979   if (GET_MODE (value) == VOIDmode)
 980     /* By this point we've dealt with values that are bigger than a word,
 981        so word_mode is a conservatively correct choice.  */
 982     value_mode = word_mode;
 983   else if (!is_a <scalar_int_mode> (GET_MODE (value), &value_mode))
 984     {
 985       value_mode = int_mode_for_mode (GET_MODE (value)).require ();
 986       value = gen_reg_rtx (value_mode);
 987       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 988     }
 989
 990   /* If OP0 is a multi-word register, narrow it to the affected word.
 991      If the region spans two words, defer to store_split_bit_field.
 992      Don't do this if op0 is a single hard register wider than word
 993      such as a float or vector register.  */
 994   if (!MEM_P (op0)
 995       && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD
 996       && (!REG_P (op0)
 997           || !HARD_REGISTER_P (op0)
 998           || hard_regno_nregs (REGNO (op0), op0_mode.require ()) != 1))
 999     {
1000       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1001         {
1002           if (!fallback_p)
1003             return false;
1004
1005           store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1006                                  bitregion_start, bitregion_end,
1007                                  value, value_mode, reverse);
1008           return true;
1009         }
1010       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1011                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1012       gcc_assert (op0);
1013       op0_mode = word_mode;
1014       bitnum %= BITS_PER_WORD;
1015     }
1016
1017   /* From here on we can assume that the field to be stored in fits
1018      within a word.  If the destination is a register, it too fits
1019      in a word.  */
1020
1021   extraction_insn insv;
1022   if (!MEM_P (op0)
1023       && !reverse
1024       && get_best_reg_extraction_insn (&insv, EP_insv,
1025                                        GET_MODE_BITSIZE (op0_mode.require ()),
1026                                        fieldmode)
1027       && store_bit_field_using_insv (&insv, op0, op0_mode,
1028                                      bitsize, bitnum, value, value_mode))
1029     return true;
1030
1031   /* If OP0 is a memory, try copying it to a register and seeing if a
1032      cheap register alternative is available.  */
1033   if (MEM_P (op0) && !reverse)
1034     {
1035       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1036                                         fieldmode)
1037           && store_bit_field_using_insv (&insv, op0, op0_mode,
1038                                          bitsize, bitnum, value, value_mode))
1039         return true;
1040
1041       rtx_insn *last = get_last_insn ();
1042
1043       /* Try loading part of OP0 into a register, inserting the bitfield
1044          into that, and then copying the result back to OP0.  */
1045       unsigned HOST_WIDE_INT bitpos;
1046       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1047                                                bitregion_start, bitregion_end,
1048                                                fieldmode, &bitpos);
1049       if (xop0)
1050         {
1051           rtx tempreg = copy_to_reg (xop0);
1052           if (store_bit_field_1 (tempreg, bitsize, bitpos,
1053                                  bitregion_start, bitregion_end,
1054                                  fieldmode, orig_value, reverse, false))
1055             {
1056               emit_move_insn (xop0, tempreg);
1057               return true;
1058             }
1059           delete_insns_since (last);
1060         }
1061     }
1062
1063   if (!fallback_p)
1064     return false;
1065
1066   store_fixed_bit_field (op0, op0_mode, bitsize, bitnum, bitregion_start,
1067                          bitregion_end, value, value_mode, reverse);
1068   return true;
1069 }
1070
1071 /* Generate code to store value from rtx VALUE
1072    into a bit-field within structure STR_RTX
1073    containing BITSIZE bits starting at bit BITNUM.
1074
1075    BITREGION_START is bitpos of the first bitfield in this region.
1076    BITREGION_END is the bitpos of the ending bitfield in this region.
1077    These two fields are 0, if the C++ memory model does not apply,
1078    or we are not interested in keeping track of bitfield regions.
1079
1080    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1081
1082    If REVERSE is true, the store is to be done in reverse order.  */
1083
1084 void
1085 store_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1086                  poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1087                  machine_mode fieldmode,
1088                  rtx value, bool reverse)
1089 {
1090   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1091   unsigned HOST_WIDE_INT ibitsize = 0, ibitnum = 0;
1092   scalar_int_mode int_mode;
1093   if (bitsize.is_constant (&ibitsize)
1094       && bitnum.is_constant (&ibitnum)
1095       && is_a <scalar_int_mode> (fieldmode, &int_mode)
1096       && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum, int_mode,
1097                                      bitregion_start, bitregion_end))
1098     {
1099       /* Storing of a full word can be done with a simple store.
1100          We know here that the field can be accessed with one single
1101          instruction.  For targets that support unaligned memory,
1102          an unaligned access may be necessary.  */
1103       if (ibitsize == GET_MODE_BITSIZE (int_mode))
1104         {
1105           str_rtx = adjust_bitfield_address (str_rtx, int_mode,
1106                                              ibitnum / BITS_PER_UNIT);
1107           if (reverse)
1108             value = flip_storage_order (int_mode, value);
1109           gcc_assert (ibitnum % BITS_PER_UNIT == 0);
1110           emit_move_insn (str_rtx, value);
1111         }
1112       else
1113         {
1114           rtx temp;
1115
1116           str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize,
1117                                           ibitnum, &ibitnum);
1118           gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
1119           temp = copy_to_reg (str_rtx);
1120           if (!store_bit_field_1 (temp, ibitsize, ibitnum, 0, 0,
1121                                   int_mode, value, reverse, true))
1122             gcc_unreachable ();
1123
1124           emit_move_insn (str_rtx, temp);
1125         }
1126
1127       return;
1128     }
1129
1130   /* Under the C++0x memory model, we must not touch bits outside the
1131      bit region.  Adjust the address to start at the beginning of the
1132      bit region.  */
1133   if (MEM_P (str_rtx) && maybe_ne (bitregion_start, 0U))
1134     {
1135       scalar_int_mode best_mode;
1136       machine_mode addr_mode = VOIDmode;
1137
1138       poly_uint64 offset = exact_div (bitregion_start, BITS_PER_UNIT);
1139       bitnum -= bitregion_start;
1140       poly_int64 size = bits_to_bytes_round_up (bitnum + bitsize);
1141       bitregion_end -= bitregion_start;
1142       bitregion_start = 0;
1143       if (bitsize.is_constant (&ibitsize)
1144           && bitnum.is_constant (&ibitnum)
1145           && get_best_mode (ibitsize, ibitnum,
1146                             bitregion_start, bitregion_end,
1147                             MEM_ALIGN (str_rtx), INT_MAX,
1148                             MEM_VOLATILE_P (str_rtx), &best_mode))
1149         addr_mode = best_mode;
1150       str_rtx = adjust_bitfield_address_size (str_rtx, addr_mode,
1151                                               offset, size);
1152     }
1153
1154   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1155                           bitregion_start, bitregion_end,
1156                           fieldmode, value, reverse, true))
1157     gcc_unreachable ();
1158 }
1159 \f
1160 /* Use shifts and boolean operations to store VALUE into a bit field of
1161    width BITSIZE in OP0, starting at bit BITNUM.  If OP0_MODE is defined,
1162    it is the mode of OP0, otherwise OP0 is a BLKmode MEM.  VALUE_MODE is
1163    the mode of VALUE.
1164
1165    If REVERSE is true, the store is to be done in reverse order.  */
1166
1167 static void
1168 store_fixed_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1169                        unsigned HOST_WIDE_INT bitsize,
1170                        unsigned HOST_WIDE_INT bitnum,
1171                        poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1172                        rtx value, scalar_int_mode value_mode, bool reverse)
1173 {
1174   /* There is a case not handled here:
1175      a structure with a known alignment of just a halfword
1176      and a field split across two aligned halfwords within the structure.
1177      Or likewise a structure with a known alignment of just a byte
1178      and a field split across two bytes.
1179      Such cases are not supposed to be able to occur.  */
1180
1181   scalar_int_mode best_mode;
1182   if (MEM_P (op0))
1183     {
1184       unsigned int max_bitsize = BITS_PER_WORD;
1185       scalar_int_mode imode;
1186       if (op0_mode.exists (&imode) && GET_MODE_BITSIZE (imode) < max_bitsize)
1187         max_bitsize = GET_MODE_BITSIZE (imode);
1188
1189       if (!get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1190                           MEM_ALIGN (op0), max_bitsize, MEM_VOLATILE_P (op0),
1191                           &best_mode))
1192         {
1193           /* The only way this should occur is if the field spans word
1194              boundaries.  */
1195           store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1196                                  bitregion_start, bitregion_end,
1197                                  value, value_mode, reverse);
1198           return;
1199         }
1200
1201       op0 = narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, &bitnum);
1202     }
1203   else
1204     best_mode = op0_mode.require ();
1205
1206   store_fixed_bit_field_1 (op0, best_mode, bitsize, bitnum,
1207                            value, value_mode, reverse);
1208 }
1209
1210 /* Helper function for store_fixed_bit_field, stores
1211    the bit field always using MODE, which is the mode of OP0.  The other
1212    arguments are as for store_fixed_bit_field.  */
1213
1214 static void
1215 store_fixed_bit_field_1 (rtx op0, scalar_int_mode mode,
1216                          unsigned HOST_WIDE_INT bitsize,
1217                          unsigned HOST_WIDE_INT bitnum,
1218                          rtx value, scalar_int_mode value_mode, bool reverse)
1219 {
1220   rtx temp;
1221   int all_zero = 0;
1222   int all_one = 0;
1223
1224   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1225      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1226
1227   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1228     /* BITNUM is the distance between our msb
1229        and that of the containing datum.
1230        Convert it to the distance from the lsb.  */
1231     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1232
1233   /* Now BITNUM is always the distance between our lsb
1234      and that of OP0.  */
1235
1236   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1237      we must first convert its mode to MODE.  */
1238
1239   if (CONST_INT_P (value))
1240     {
1241       unsigned HOST_WIDE_INT v = UINTVAL (value);
1242
1243       if (bitsize < HOST_BITS_PER_WIDE_INT)
1244         v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1245
1246       if (v == 0)
1247         all_zero = 1;
1248       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1249                 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1250                || (bitsize == HOST_BITS_PER_WIDE_INT
1251                    && v == HOST_WIDE_INT_M1U))
1252         all_one = 1;
1253
1254       value = lshift_value (mode, v, bitnum);
1255     }
1256   else
1257     {
1258       int must_and = (GET_MODE_BITSIZE (value_mode) != bitsize
1259                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1260
1261       if (value_mode != mode)
1262         value = convert_to_mode (mode, value, 1);
1263
1264       if (must_and)
1265         value = expand_binop (mode, and_optab, value,
1266                               mask_rtx (mode, 0, bitsize, 0),
1267                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1268       if (bitnum > 0)
1269         value = expand_shift (LSHIFT_EXPR, mode, value,
1270                               bitnum, NULL_RTX, 1);
1271     }
1272
1273   if (reverse)
1274     value = flip_storage_order (mode, value);
1275
1276   /* Now clear the chosen bits in OP0,
1277      except that if VALUE is -1 we need not bother.  */
1278   /* We keep the intermediates in registers to allow CSE to combine
1279      consecutive bitfield assignments.  */
1280
1281   temp = force_reg (mode, op0);
1282
1283   if (! all_one)
1284     {
1285       rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1286       if (reverse)
1287         mask = flip_storage_order (mode, mask);
1288       temp = expand_binop (mode, and_optab, temp, mask,
1289                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1290       temp = force_reg (mode, temp);
1291     }
1292
1293   /* Now logical-or VALUE into OP0, unless it is zero.  */
1294
1295   if (! all_zero)
1296     {
1297       temp = expand_binop (mode, ior_optab, temp, value,
1298                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1299       temp = force_reg (mode, temp);
1300     }
1301
1302   if (op0 != temp)
1303     {
1304       op0 = copy_rtx (op0);
1305       emit_move_insn (op0, temp);
1306     }
1307 }
1308 \f
1309 /* Store a bit field that is split across multiple accessible memory objects.
1310
1311    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1312    BITSIZE is the field width; BITPOS the position of its first bit
1313    (within the word).
1314    VALUE is the value to store, which has mode VALUE_MODE.
1315    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
1316    a BLKmode MEM.
1317
1318    If REVERSE is true, the store is to be done in reverse order.
1319
1320    This does not yet handle fields wider than BITS_PER_WORD.  */
1321
1322 static void
1323 store_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1324                        unsigned HOST_WIDE_INT bitsize,
1325                        unsigned HOST_WIDE_INT bitpos,
1326                        poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1327                        rtx value, scalar_int_mode value_mode, bool reverse)
1328 {
1329   unsigned int unit, total_bits, bitsdone = 0;
1330
1331   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1332      much at a time.  */
1333   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1334     unit = BITS_PER_WORD;
1335   else
1336     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1337
1338   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1339      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1340      again, and we will mutually recurse forever.  */
1341   if (MEM_P (op0) && op0_mode.exists ())
1342     unit = MIN (unit, GET_MODE_BITSIZE (op0_mode.require ()));
1343
1344   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1345      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1346      that VALUE might be a floating-point constant.  */
1347   if (CONSTANT_P (value) && !CONST_INT_P (value))
1348     {
1349       rtx word = gen_lowpart_common (word_mode, value);
1350
1351       if (word && (value != word))
1352         value = word;
1353       else
1354         value = gen_lowpart_common (word_mode, force_reg (value_mode, value));
1355       value_mode = word_mode;
1356     }
1357
1358   total_bits = GET_MODE_BITSIZE (value_mode);
1359
1360   while (bitsdone < bitsize)
1361     {
1362       unsigned HOST_WIDE_INT thissize;
1363       unsigned HOST_WIDE_INT thispos;
1364       unsigned HOST_WIDE_INT offset;
1365       rtx part;
1366
1367       offset = (bitpos + bitsdone) / unit;
1368       thispos = (bitpos + bitsdone) % unit;
1369
1370       /* When region of bytes we can touch is restricted, decrease
1371          UNIT close to the end of the region as needed.  If op0 is a REG
1372          or SUBREG of REG, don't do this, as there can't be data races
1373          on a register and we can expand shorter code in some cases.  */
1374       if (maybe_ne (bitregion_end, 0U)
1375           && unit > BITS_PER_UNIT
1376           && maybe_gt (bitpos + bitsdone - thispos + unit, bitregion_end + 1)
1377           && !REG_P (op0)
1378           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1379         {
1380           unit = unit / 2;
1381           continue;
1382         }
1383
1384       /* THISSIZE must not overrun a word boundary.  Otherwise,
1385          store_fixed_bit_field will call us again, and we will mutually
1386          recurse forever.  */
1387       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1388       thissize = MIN (thissize, unit - thispos);
1389
1390       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1391         {
1392           /* Fetch successively less significant portions.  */
1393           if (CONST_INT_P (value))
1394             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1395                              >> (bitsize - bitsdone - thissize))
1396                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1397           /* Likewise, but the source is little-endian.  */
1398           else if (reverse)
1399             part = extract_fixed_bit_field (word_mode, value, value_mode,
1400                                             thissize,
1401                                             bitsize - bitsdone - thissize,
1402                                             NULL_RTX, 1, false);
1403           else
1404             /* The args are chosen so that the last part includes the
1405                lsb.  Give extract_bit_field the value it needs (with
1406                endianness compensation) to fetch the piece we want.  */
1407             part = extract_fixed_bit_field (word_mode, value, value_mode,
1408                                             thissize,
1409                                             total_bits - bitsize + bitsdone,
1410                                             NULL_RTX, 1, false);
1411         }
1412       else
1413         {
1414           /* Fetch successively more significant portions.  */
1415           if (CONST_INT_P (value))
1416             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1417                              >> bitsdone)
1418                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1419           /* Likewise, but the source is big-endian.  */
1420           else if (reverse)
1421             part = extract_fixed_bit_field (word_mode, value, value_mode,
1422                                             thissize,
1423                                             total_bits - bitsdone - thissize,
1424                                             NULL_RTX, 1, false);
1425           else
1426             part = extract_fixed_bit_field (word_mode, value, value_mode,
1427                                             thissize, bitsdone, NULL_RTX,
1428                                             1, false);
1429         }
1430
1431       /* If OP0 is a register, then handle OFFSET here.  */
1432       rtx op0_piece = op0;
1433       opt_scalar_int_mode op0_piece_mode = op0_mode;
1434       if (SUBREG_P (op0) || REG_P (op0))
1435         {
1436           scalar_int_mode imode;
1437           if (op0_mode.exists (&imode)
1438               && GET_MODE_SIZE (imode) < UNITS_PER_WORD)
1439             {
1440               if (offset)
1441                 op0_piece = const0_rtx;
1442             }
1443           else
1444             {
1445               op0_piece = operand_subword_force (op0,
1446                                                  offset * unit / BITS_PER_WORD,
1447                                                  GET_MODE (op0));
1448               op0_piece_mode = word_mode;
1449             }
1450           offset &= BITS_PER_WORD / unit - 1;
1451         }
1452
1453       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1454          it is just an out-of-bounds access.  Ignore it.  */
1455       if (op0_piece != const0_rtx)
1456         store_fixed_bit_field (op0_piece, op0_piece_mode, thissize,
1457                                offset * unit + thispos, bitregion_start,
1458                                bitregion_end, part, word_mode, reverse);
1459       bitsdone += thissize;
1460     }
1461 }
1462 \f
1463 /* A subroutine of extract_bit_field_1 that converts return value X
1464    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1465    to extract_bit_field.  */
1466
1467 static rtx
1468 convert_extracted_bit_field (rtx x, machine_mode mode,
1469                              machine_mode tmode, bool unsignedp)
1470 {
1471   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1472     return x;
1473
1474   /* If the x mode is not a scalar integral, first convert to the
1475      integer mode of that size and then access it as a floating-point
1476      value via a SUBREG.  */
1477   if (!SCALAR_INT_MODE_P (tmode))
1478     {
1479       scalar_int_mode int_mode = int_mode_for_mode (tmode).require ();
1480       x = convert_to_mode (int_mode, x, unsignedp);
1481       x = force_reg (int_mode, x);
1482       return gen_lowpart (tmode, x);
1483     }
1484
1485   return convert_to_mode (tmode, x, unsignedp);
1486 }
1487
1488 /* Try to use an ext(z)v pattern to extract a field from OP0.
1489    Return the extracted value on success, otherwise return null.
1490    EXTV describes the extraction instruction to use.  If OP0_MODE
1491    is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM.
1492    The other arguments are as for extract_bit_field.  */
1493
1494 static rtx
1495 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1496                               opt_scalar_int_mode op0_mode,
1497                               unsigned HOST_WIDE_INT bitsize,
1498                               unsigned HOST_WIDE_INT bitnum,
1499                               int unsignedp, rtx target,
1500                               machine_mode mode, machine_mode tmode)
1501 {
1502   struct expand_operand ops[4];
1503   rtx spec_target = target;
1504   rtx spec_target_subreg = 0;
1505   scalar_int_mode ext_mode = extv->field_mode;
1506   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1507
1508   if (bitsize == 0 || unit < bitsize)
1509     return NULL_RTX;
1510
1511   if (MEM_P (op0))
1512     /* Get a reference to the first byte of the field.  */
1513     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1514                                 &bitnum);
1515   else
1516     {
1517       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1518       if (BYTES_BIG_ENDIAN)
1519         bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
1520
1521       /* If op0 is a register, we need it in EXT_MODE to make it
1522          acceptable to the format of ext(z)v.  */
1523       if (GET_CODE (op0) == SUBREG && op0_mode.require () != ext_mode)
1524         return NULL_RTX;
1525       if (REG_P (op0) && op0_mode.require () != ext_mode)
1526         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1527     }
1528
1529   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1530      "backwards" from the size of the unit we are extracting from.
1531      Otherwise, we count bits from the most significant on a
1532      BYTES/BITS_BIG_ENDIAN machine.  */
1533
1534   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1535     bitnum = unit - bitsize - bitnum;
1536
1537   if (target == 0)
1538     target = spec_target = gen_reg_rtx (tmode);
1539
1540   if (GET_MODE (target) != ext_mode)
1541     {
1542       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1543          between the mode of the extraction (word_mode) and the target
1544          mode.  Instead, create a temporary and use convert_move to set
1545          the target.  */
1546       if (REG_P (target)
1547           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1548         {
1549           target = gen_lowpart (ext_mode, target);
1550           if (partial_subreg_p (GET_MODE (spec_target), ext_mode))
1551             spec_target_subreg = target;
1552         }
1553       else
1554         target = gen_reg_rtx (ext_mode);
1555     }
1556
1557   create_output_operand (&ops[0], target, ext_mode);
1558   create_fixed_operand (&ops[1], op0);
1559   create_integer_operand (&ops[2], bitsize);
1560   create_integer_operand (&ops[3], bitnum);
1561   if (maybe_expand_insn (extv->icode, 4, ops))
1562     {
1563       target = ops[0].value;
1564       if (target == spec_target)
1565         return target;
1566       if (target == spec_target_subreg)
1567         return spec_target;
1568       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1569     }
1570   return NULL_RTX;
1571 }
1572
1573 /* See whether it would be valid to extract the part of OP0 described
1574    by BITNUM and BITSIZE into a value of mode MODE using a subreg
1575    operation.  Return the subreg if so, otherwise return null.  */
1576
1577 static rtx
1578 extract_bit_field_as_subreg (machine_mode mode, rtx op0,
1579                              poly_uint64 bitsize, poly_uint64 bitnum)
1580 {
1581   poly_uint64 bytenum;
1582   if (multiple_p (bitnum, BITS_PER_UNIT, &bytenum)
1583       && known_eq (bitsize, GET_MODE_BITSIZE (mode))
1584       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1585       && TRULY_NOOP_TRUNCATION_MODES_P (mode, GET_MODE (op0)))
1586     return simplify_gen_subreg (mode, op0, GET_MODE (op0), bytenum);
1587   return NULL_RTX;
1588 }
1589
1590 /* A subroutine of extract_bit_field, with the same arguments.
1591    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1592    if we can find no other means of implementing the operation.
1593    if FALLBACK_P is false, return NULL instead.  */
1594
1595 static rtx
1596 extract_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1597                      int unsignedp, rtx target, machine_mode mode,
1598                      machine_mode tmode, bool reverse, bool fallback_p,
1599                      rtx *alt_rtl)
1600 {
1601   rtx op0 = str_rtx;
1602   machine_mode mode1;
1603
1604   if (tmode == VOIDmode)
1605     tmode = mode;
1606
1607   while (GET_CODE (op0) == SUBREG)
1608     {
1609       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1610       op0 = SUBREG_REG (op0);
1611     }
1612
1613   /* If we have an out-of-bounds access to a register, just return an
1614      uninitialized register of the required mode.  This can occur if the
1615      source code contains an out-of-bounds access to a small array.  */
1616   if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
1617     return gen_reg_rtx (tmode);
1618
1619   if (REG_P (op0)
1620       && mode == GET_MODE (op0)
1621       && known_eq (bitnum, 0U)
1622       && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
1623     {
1624       if (reverse)
1625         op0 = flip_storage_order (mode, op0);
1626       /* We're trying to extract a full register from itself.  */
1627       return op0;
1628     }
1629
1630   /* First try to check for vector from vector extractions.  */
1631   if (VECTOR_MODE_P (GET_MODE (op0))
1632       && !MEM_P (op0)
1633       && VECTOR_MODE_P (tmode)
1634       && known_eq (bitsize, GET_MODE_SIZE (tmode))
1635       && maybe_gt (GET_MODE_SIZE (GET_MODE (op0)), GET_MODE_SIZE (tmode)))
1636     {
1637       machine_mode new_mode = GET_MODE (op0);
1638       if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1639         {
1640           scalar_mode inner_mode = GET_MODE_INNER (tmode);
1641           poly_uint64 nunits;
1642           if (!multiple_p (GET_MODE_BITSIZE (GET_MODE (op0)),
1643                            GET_MODE_UNIT_BITSIZE (tmode), &nunits)
1644               || !mode_for_vector (inner_mode, nunits).exists (&new_mode)
1645               || !VECTOR_MODE_P (new_mode)
1646               || maybe_ne (GET_MODE_SIZE (new_mode),
1647                            GET_MODE_SIZE (GET_MODE (op0)))
1648               || GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode)
1649               || !targetm.vector_mode_supported_p (new_mode))
1650             new_mode = VOIDmode;
1651         }
1652       poly_uint64 pos;
1653       if (new_mode != VOIDmode
1654           && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1655               != CODE_FOR_nothing)
1656           && multiple_p (bitnum, GET_MODE_BITSIZE (tmode), &pos))
1657         {
1658           struct expand_operand ops[3];
1659           machine_mode outermode = new_mode;
1660           machine_mode innermode = tmode;
1661           enum insn_code icode
1662             = convert_optab_handler (vec_extract_optab, outermode, innermode);
1663
1664           if (new_mode != GET_MODE (op0))
1665             op0 = gen_lowpart (new_mode, op0);
1666           create_output_operand (&ops[0], target, innermode);
1667           ops[0].target = 1;
1668           create_input_operand (&ops[1], op0, outermode);
1669           create_integer_operand (&ops[2], pos);
1670           if (maybe_expand_insn (icode, 3, ops))
1671             {
1672               if (alt_rtl && ops[0].target)
1673                 *alt_rtl = target;
1674               target = ops[0].value;
1675               if (GET_MODE (target) != mode)
1676                 return gen_lowpart (tmode, target);
1677               return target;
1678             }
1679         }
1680     }
1681
1682   /* See if we can get a better vector mode before extracting.  */
1683   if (VECTOR_MODE_P (GET_MODE (op0))
1684       && !MEM_P (op0)
1685       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1686     {
1687       machine_mode new_mode;
1688
1689       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1690         new_mode = MIN_MODE_VECTOR_FLOAT;
1691       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1692         new_mode = MIN_MODE_VECTOR_FRACT;
1693       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1694         new_mode = MIN_MODE_VECTOR_UFRACT;
1695       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1696         new_mode = MIN_MODE_VECTOR_ACCUM;
1697       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1698         new_mode = MIN_MODE_VECTOR_UACCUM;
1699       else
1700         new_mode = MIN_MODE_VECTOR_INT;
1701
1702       FOR_EACH_MODE_FROM (new_mode, new_mode)
1703         if (known_eq (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (GET_MODE (op0)))
1704             && known_eq (GET_MODE_UNIT_SIZE (new_mode), GET_MODE_SIZE (tmode))
1705             && targetm.vector_mode_supported_p (new_mode))
1706           break;
1707       if (new_mode != VOIDmode)
1708         op0 = gen_lowpart (new_mode, op0);
1709     }
1710
1711   /* Use vec_extract patterns for extracting parts of vectors whenever
1712      available.  If that fails, see whether the current modes and bitregion
1713      give a natural subreg.  */
1714   machine_mode outermode = GET_MODE (op0);
1715   if (VECTOR_MODE_P (outermode) && !MEM_P (op0))
1716     {
1717       scalar_mode innermode = GET_MODE_INNER (outermode);
1718       enum insn_code icode
1719         = convert_optab_handler (vec_extract_optab, outermode, innermode);
1720       poly_uint64 pos;
1721       if (icode != CODE_FOR_nothing
1722           && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
1723           && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
1724         {
1725           struct expand_operand ops[3];
1726
1727           create_output_operand (&ops[0], target, innermode);
1728           ops[0].target = 1;
1729           create_input_operand (&ops[1], op0, outermode);
1730           create_integer_operand (&ops[2], pos);
1731           if (maybe_expand_insn (icode, 3, ops))
1732             {
1733               if (alt_rtl && ops[0].target)
1734                 *alt_rtl = target;
1735               target = ops[0].value;
1736               if (GET_MODE (target) != mode)
1737                 return gen_lowpart (tmode, target);
1738               return target;
1739             }
1740         }
1741       /* Using subregs is useful if we're extracting the least-significant
1742          vector element, or if we're extracting one register vector from
1743          a multi-register vector.  extract_bit_field_as_subreg checks
1744          for valid bitsize and bitnum, so we don't need to do that here.
1745
1746          The mode check makes sure that we're extracting either
1747          a single element or a subvector with the same element type.
1748          If the modes aren't such a natural fit, fall through and
1749          bitcast to integers first.  */
1750       if (GET_MODE_INNER (mode) == innermode)
1751         {
1752           rtx sub = extract_bit_field_as_subreg (mode, op0, bitsize, bitnum);
1753           if (sub)
1754             return sub;
1755         }
1756     }
1757
1758   /* Make sure we are playing with integral modes.  Pun with subregs
1759      if we aren't.  */
1760   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
1761   scalar_int_mode imode;
1762   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
1763     {
1764       if (MEM_P (op0))
1765         op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
1766                                             0, MEM_SIZE (op0));
1767       else if (op0_mode.exists (&imode))
1768         {
1769           op0 = gen_lowpart (imode, op0);
1770
1771           /* If we got a SUBREG, force it into a register since we
1772              aren't going to be able to do another SUBREG on it.  */
1773           if (GET_CODE (op0) == SUBREG)
1774             op0 = force_reg (imode, op0);
1775         }
1776       else
1777         {
1778           poly_int64 size = GET_MODE_SIZE (GET_MODE (op0));
1779           rtx mem = assign_stack_temp (GET_MODE (op0), size);
1780           emit_move_insn (mem, op0);
1781           op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1782         }
1783     }
1784
1785   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1786      If that's wrong, the solution is to test for it and set TARGET to 0
1787      if needed.  */
1788
1789   /* Get the mode of the field to use for atomic access or subreg
1790      conversion.  */
1791   if (!SCALAR_INT_MODE_P (tmode)
1792       || !mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0).exists (&mode1))
1793     mode1 = mode;
1794   gcc_assert (mode1 != BLKmode);
1795
1796   /* Extraction of a full MODE1 value can be done with a subreg as long
1797      as the least significant bit of the value is the least significant
1798      bit of either OP0 or a word of OP0.  */
1799   if (!MEM_P (op0) && !reverse)
1800     {
1801       rtx sub = extract_bit_field_as_subreg (mode1, op0, bitsize, bitnum);
1802       if (sub)
1803         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1804     }
1805
1806   /* Extraction of a full MODE1 value can be done with a load as long as
1807      the field is on a byte boundary and is sufficiently aligned.  */
1808   poly_uint64 bytenum;
1809   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1, &bytenum))
1810     {
1811       op0 = adjust_bitfield_address (op0, mode1, bytenum);
1812       if (reverse)
1813         op0 = flip_storage_order (mode1, op0);
1814       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1815     }
1816
1817   /* If we have a memory source and a non-constant bit offset, restrict
1818      the memory to the referenced bytes.  This is a worst-case fallback
1819      but is useful for things like vector booleans.  */
1820   if (MEM_P (op0) && !bitnum.is_constant ())
1821     {
1822       bytenum = bits_to_bytes_round_down (bitnum);
1823       bitnum = num_trailing_bits (bitnum);
1824       poly_uint64 bytesize = bits_to_bytes_round_up (bitnum + bitsize);
1825       op0 = adjust_bitfield_address_size (op0, BLKmode, bytenum, bytesize);
1826       op0_mode = opt_scalar_int_mode ();
1827     }
1828
1829   /* It's possible we'll need to handle other cases here for
1830      polynomial bitnum and bitsize.  */
1831
1832   /* From here on we need to be looking at a fixed-size insertion.  */
1833   return extract_integral_bit_field (op0, op0_mode, bitsize.to_constant (),
1834                                      bitnum.to_constant (), unsignedp,
1835                                      target, mode, tmode, reverse, fallback_p);
1836 }
1837
1838 /* Subroutine of extract_bit_field_1, with the same arguments, except
1839    that BITSIZE and BITNUM are constant.  Handle cases specific to
1840    integral modes.  If OP0_MODE is defined, it is the mode of OP0,
1841    otherwise OP0 is a BLKmode MEM.  */
1842
1843 static rtx
1844 extract_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1845                             unsigned HOST_WIDE_INT bitsize,
1846                             unsigned HOST_WIDE_INT bitnum, int unsignedp,
1847                             rtx target, machine_mode mode, machine_mode tmode,
1848                             bool reverse, bool fallback_p)
1849 {
1850   /* Handle fields bigger than a word.  */
1851
1852   if (bitsize > BITS_PER_WORD)
1853     {
1854       /* Here we transfer the words of the field
1855          in the order least significant first.
1856          This is because the most significant word is the one which may
1857          be less than full.  */
1858
1859       const bool backwards = WORDS_BIG_ENDIAN;
1860       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1861       unsigned int i;
1862       rtx_insn *last;
1863
1864       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1865         target = gen_reg_rtx (mode);
1866
1867       /* In case we're about to clobber a base register or something
1868          (see gcc.c-torture/execute/20040625-1.c).   */
1869       if (reg_mentioned_p (target, op0))
1870         target = gen_reg_rtx (mode);
1871
1872       /* Indicate for flow that the entire target reg is being set.  */
1873       emit_clobber (target);
1874
1875       /* The mode must be fixed-size, since extract_bit_field_1 handles
1876          extractions from variable-sized objects before calling this
1877          function.  */
1878       unsigned int target_size
1879         = GET_MODE_SIZE (GET_MODE (target)).to_constant ();
1880       last = get_last_insn ();
1881       for (i = 0; i < nwords; i++)
1882         {
1883           /* If I is 0, use the low-order word in both field and target;
1884              if I is 1, use the next to lowest word; and so on.  */
1885           /* Word number in TARGET to use.  */
1886           unsigned int wordnum
1887             = (backwards ? target_size / UNITS_PER_WORD - i - 1 : i);
1888           /* Offset from start of field in OP0.  */
1889           unsigned int bit_offset = (backwards ^ reverse
1890                                      ? MAX ((int) bitsize - ((int) i + 1)
1891                                             * BITS_PER_WORD,
1892                                             0)
1893                                      : (int) i * BITS_PER_WORD);
1894           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1895           rtx result_part
1896             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1897                                              bitsize - i * BITS_PER_WORD),
1898                                    bitnum + bit_offset, 1, target_part,
1899                                    mode, word_mode, reverse, fallback_p, NULL);
1900
1901           gcc_assert (target_part);
1902           if (!result_part)
1903             {
1904               delete_insns_since (last);
1905               return NULL;
1906             }
1907
1908           if (result_part != target_part)
1909             emit_move_insn (target_part, result_part);
1910         }
1911
1912       if (unsignedp)
1913         {
1914           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1915              need to be zero'd out.  */
1916           if (target_size > nwords * UNITS_PER_WORD)
1917             {
1918               unsigned int i, total_words;
1919
1920               total_words = target_size / UNITS_PER_WORD;
1921               for (i = nwords; i < total_words; i++)
1922                 emit_move_insn
1923                   (operand_subword (target,
1924                                     backwards ? total_words - i - 1 : i,
1925                                     1, VOIDmode),
1926                    const0_rtx);
1927             }
1928           return target;
1929         }
1930
1931       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1932       target = expand_shift (LSHIFT_EXPR, mode, target,
1933                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1934       return expand_shift (RSHIFT_EXPR, mode, target,
1935                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1936     }
1937
1938   /* If OP0 is a multi-word register, narrow it to the affected word.
1939      If the region spans two words, defer to extract_split_bit_field.  */
1940   if (!MEM_P (op0) && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD)
1941     {
1942       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1943         {
1944           if (!fallback_p)
1945             return NULL_RTX;
1946           target = extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
1947                                             unsignedp, reverse);
1948           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1949         }
1950       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1951                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1952       op0_mode = word_mode;
1953       bitnum %= BITS_PER_WORD;
1954     }
1955
1956   /* From here on we know the desired field is smaller than a word.
1957      If OP0 is a register, it too fits within a word.  */
1958   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1959   extraction_insn extv;
1960   if (!MEM_P (op0)
1961       && !reverse
1962       /* ??? We could limit the structure size to the part of OP0 that
1963          contains the field, with appropriate checks for endianness
1964          and TARGET_TRULY_NOOP_TRUNCATION.  */
1965       && get_best_reg_extraction_insn (&extv, pattern,
1966                                        GET_MODE_BITSIZE (op0_mode.require ()),
1967                                        tmode))
1968     {
1969       rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
1970                                                  bitsize, bitnum,
1971                                                  unsignedp, target, mode,
1972                                                  tmode);
1973       if (result)
1974         return result;
1975     }
1976
1977   /* If OP0 is a memory, try copying it to a register and seeing if a
1978      cheap register alternative is available.  */
1979   if (MEM_P (op0) & !reverse)
1980     {
1981       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1982                                         tmode))
1983         {
1984           rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
1985                                                      bitsize, bitnum,
1986                                                      unsignedp, target, mode,
1987                                                      tmode);
1988           if (result)
1989             return result;
1990         }
1991
1992       rtx_insn *last = get_last_insn ();
1993
1994       /* Try loading part of OP0 into a register and extracting the
1995          bitfield from that.  */
1996       unsigned HOST_WIDE_INT bitpos;
1997       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1998                                                0, 0, tmode, &bitpos);
1999       if (xop0)
2000         {
2001           xop0 = copy_to_reg (xop0);
2002           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
2003                                             unsignedp, target,
2004                                             mode, tmode, reverse, false, NULL);
2005           if (result)
2006             return result;
2007           delete_insns_since (last);
2008         }
2009     }
2010
2011   if (!fallback_p)
2012     return NULL;
2013
2014   /* Find a correspondingly-sized integer field, so we can apply
2015      shifts and masks to it.  */
2016   scalar_int_mode int_mode;
2017   if (!int_mode_for_mode (tmode).exists (&int_mode))
2018     /* If this fails, we should probably push op0 out to memory and then
2019        do a load.  */
2020     int_mode = int_mode_for_mode (mode).require ();
2021
2022   target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize,
2023                                     bitnum, target, unsignedp, reverse);
2024
2025   /* Complex values must be reversed piecewise, so we need to undo the global
2026      reversal, convert to the complex mode and reverse again.  */
2027   if (reverse && COMPLEX_MODE_P (tmode))
2028     {
2029       target = flip_storage_order (int_mode, target);
2030       target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2031       target = flip_storage_order (tmode, target);
2032     }
2033   else
2034     target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2035
2036   return target;
2037 }
2038
2039 /* Generate code to extract a byte-field from STR_RTX
2040    containing BITSIZE bits, starting at BITNUM,
2041    and put it in TARGET if possible (if TARGET is nonzero).
2042    Regardless of TARGET, we return the rtx for where the value is placed.
2043
2044    STR_RTX is the structure containing the byte (a REG or MEM).
2045    UNSIGNEDP is nonzero if this is an unsigned bit field.
2046    MODE is the natural mode of the field value once extracted.
2047    TMODE is the mode the caller would like the value to have;
2048    but the value may be returned with type MODE instead.
2049
2050    If REVERSE is true, the extraction is to be done in reverse order.
2051
2052    If a TARGET is specified and we can store in it at no extra cost,
2053    we do so, and return TARGET.
2054    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
2055    if they are equally easy.  */
2056
2057 rtx
2058 extract_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
2059                    int unsignedp, rtx target, machine_mode mode,
2060                    machine_mode tmode, bool reverse, rtx *alt_rtl)
2061 {
2062   machine_mode mode1;
2063
2064   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
2065   if (maybe_ne (GET_MODE_BITSIZE (GET_MODE (str_rtx)), 0))
2066     mode1 = GET_MODE (str_rtx);
2067   else if (target && maybe_ne (GET_MODE_BITSIZE (GET_MODE (target)), 0))
2068     mode1 = GET_MODE (target);
2069   else
2070     mode1 = tmode;
2071
2072   unsigned HOST_WIDE_INT ibitsize, ibitnum;
2073   scalar_int_mode int_mode;
2074   if (bitsize.is_constant (&ibitsize)
2075       && bitnum.is_constant (&ibitnum)
2076       && is_a <scalar_int_mode> (mode1, &int_mode)
2077       && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum,
2078                                      int_mode, 0, 0))
2079     {
2080       /* Extraction of a full INT_MODE value can be done with a simple load.
2081          We know here that the field can be accessed with one single
2082          instruction.  For targets that support unaligned memory,
2083          an unaligned access may be necessary.  */
2084       if (ibitsize == GET_MODE_BITSIZE (int_mode))
2085         {
2086           rtx result = adjust_bitfield_address (str_rtx, int_mode,
2087                                                 ibitnum / BITS_PER_UNIT);
2088           if (reverse)
2089             result = flip_storage_order (int_mode, result);
2090           gcc_assert (ibitnum % BITS_PER_UNIT == 0);
2091           return convert_extracted_bit_field (result, mode, tmode, unsignedp);
2092         }
2093
2094       str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize, ibitnum,
2095                                       &ibitnum);
2096       gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
2097       str_rtx = copy_to_reg (str_rtx);
2098       return extract_bit_field_1 (str_rtx, ibitsize, ibitnum, unsignedp,
2099                                   target, mode, tmode, reverse, true, alt_rtl);
2100     }
2101
2102   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
2103                               target, mode, tmode, reverse, true, alt_rtl);
2104 }
2105 \f
2106 /* Use shifts and boolean operations to extract a field of BITSIZE bits
2107    from bit BITNUM of OP0.  If OP0_MODE is defined, it is the mode of OP0,
2108    otherwise OP0 is a BLKmode MEM.
2109
2110    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
2111    If REVERSE is true, the extraction is to be done in reverse order.
2112
2113    If TARGET is nonzero, attempts to store the value there
2114    and return TARGET, but this is not guaranteed.
2115    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
2116
2117 static rtx
2118 extract_fixed_bit_field (machine_mode tmode, rtx op0,
2119                          opt_scalar_int_mode op0_mode,
2120                          unsigned HOST_WIDE_INT bitsize,
2121                          unsigned HOST_WIDE_INT bitnum, rtx target,
2122                          int unsignedp, bool reverse)
2123 {
2124   scalar_int_mode mode;
2125   if (MEM_P (op0))
2126     {
2127       if (!get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
2128                           BITS_PER_WORD, MEM_VOLATILE_P (op0), &mode))
2129         /* The only way this should occur is if the field spans word
2130            boundaries.  */
2131         return extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
2132                                         unsignedp, reverse);
2133
2134       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
2135     }
2136   else
2137     mode = op0_mode.require ();
2138
2139   return extract_fixed_bit_field_1 (tmode, op0, mode, bitsize, bitnum,
2140                                     target, unsignedp, reverse);
2141 }
2142
2143 /* Helper function for extract_fixed_bit_field, extracts
2144    the bit field always using MODE, which is the mode of OP0.
2145    The other arguments are as for extract_fixed_bit_field.  */
2146
2147 static rtx
2148 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, scalar_int_mode mode,
2149                            unsigned HOST_WIDE_INT bitsize,
2150                            unsigned HOST_WIDE_INT bitnum, rtx target,
2151                            int unsignedp, bool reverse)
2152 {
2153   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2154      for invalid input, such as extract equivalent of f5 from
2155      gcc.dg/pr48335-2.c.  */
2156
2157   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2158     /* BITNUM is the distance between our msb and that of OP0.
2159        Convert it to the distance from the lsb.  */
2160     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2161
2162   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2163      We have reduced the big-endian case to the little-endian case.  */
2164   if (reverse)
2165     op0 = flip_storage_order (mode, op0);
2166
2167   if (unsignedp)
2168     {
2169       if (bitnum)
2170         {
2171           /* If the field does not already start at the lsb,
2172              shift it so it does.  */
2173           /* Maybe propagate the target for the shift.  */
2174           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2175           if (tmode != mode)
2176             subtarget = 0;
2177           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2178         }
2179       /* Convert the value to the desired mode.  TMODE must also be a
2180          scalar integer for this conversion to make sense, since we
2181          shouldn't reinterpret the bits.  */
2182       scalar_int_mode new_mode = as_a <scalar_int_mode> (tmode);
2183       if (mode != new_mode)
2184         op0 = convert_to_mode (new_mode, op0, 1);
2185
2186       /* Unless the msb of the field used to be the msb when we shifted,
2187          mask out the upper bits.  */
2188
2189       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2190         return expand_binop (new_mode, and_optab, op0,
2191                              mask_rtx (new_mode, 0, bitsize, 0),
2192                              target, 1, OPTAB_LIB_WIDEN);
2193       return op0;
2194     }
2195
2196   /* To extract a signed bit-field, first shift its msb to the msb of the word,
2197      then arithmetic-shift its lsb to the lsb of the word.  */
2198   op0 = force_reg (mode, op0);
2199
2200   /* Find the narrowest integer mode that contains the field.  */
2201
2202   opt_scalar_int_mode mode_iter;
2203   FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
2204     if (GET_MODE_BITSIZE (mode_iter.require ()) >= bitsize + bitnum)
2205       break;
2206
2207   mode = mode_iter.require ();
2208   op0 = convert_to_mode (mode, op0, 0);
2209
2210   if (mode != tmode)
2211     target = 0;
2212
2213   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2214     {
2215       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2216       /* Maybe propagate the target for the shift.  */
2217       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2218       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2219     }
2220
2221   return expand_shift (RSHIFT_EXPR, mode, op0,
2222                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2223 }
2224
2225 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2226    VALUE << BITPOS.  */
2227
2228 static rtx
2229 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2230               int bitpos)
2231 {
2232   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2233 }
2234 \f
2235 /* Extract a bit field that is split across two words
2236    and return an RTX for the result.
2237
2238    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2239    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2240    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2241    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
2242    a BLKmode MEM.
2243
2244    If REVERSE is true, the extraction is to be done in reverse order.  */
2245
2246 static rtx
2247 extract_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
2248                          unsigned HOST_WIDE_INT bitsize,
2249                          unsigned HOST_WIDE_INT bitpos, int unsignedp,
2250                          bool reverse)
2251 {
2252   unsigned int unit;
2253   unsigned int bitsdone = 0;
2254   rtx result = NULL_RTX;
2255   int first = 1;
2256
2257   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2258      much at a time.  */
2259   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2260     unit = BITS_PER_WORD;
2261   else
2262     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2263
2264   while (bitsdone < bitsize)
2265     {
2266       unsigned HOST_WIDE_INT thissize;
2267       rtx part;
2268       unsigned HOST_WIDE_INT thispos;
2269       unsigned HOST_WIDE_INT offset;
2270
2271       offset = (bitpos + bitsdone) / unit;
2272       thispos = (bitpos + bitsdone) % unit;
2273
2274       /* THISSIZE must not overrun a word boundary.  Otherwise,
2275          extract_fixed_bit_field will call us again, and we will mutually
2276          recurse forever.  */
2277       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2278       thissize = MIN (thissize, unit - thispos);
2279
2280       /* If OP0 is a register, then handle OFFSET here.  */
2281       rtx op0_piece = op0;
2282       opt_scalar_int_mode op0_piece_mode = op0_mode;
2283       if (SUBREG_P (op0) || REG_P (op0))
2284         {
2285           op0_piece = operand_subword_force (op0, offset, op0_mode.require ());
2286           op0_piece_mode = word_mode;
2287           offset = 0;
2288         }
2289
2290       /* Extract the parts in bit-counting order,
2291          whose meaning is determined by BYTES_PER_UNIT.
2292          OFFSET is in UNITs, and UNIT is in bits.  */
2293       part = extract_fixed_bit_field (word_mode, op0_piece, op0_piece_mode,
2294                                       thissize, offset * unit + thispos,
2295                                       0, 1, reverse);
2296       bitsdone += thissize;
2297
2298       /* Shift this part into place for the result.  */
2299       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2300         {
2301           if (bitsize != bitsdone)
2302             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2303                                  bitsize - bitsdone, 0, 1);
2304         }
2305       else
2306         {
2307           if (bitsdone != thissize)
2308             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2309                                  bitsdone - thissize, 0, 1);
2310         }
2311
2312       if (first)
2313         result = part;
2314       else
2315         /* Combine the parts with bitwise or.  This works
2316            because we extracted each part as an unsigned bit field.  */
2317         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2318                                OPTAB_LIB_WIDEN);
2319
2320       first = 0;
2321     }
2322
2323   /* Unsigned bit field: we are done.  */
2324   if (unsignedp)
2325     return result;
2326   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2327   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2328                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2329   return expand_shift (RSHIFT_EXPR, word_mode, result,
2330                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2331 }
2332 \f
2333 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2334    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2335    MODE, fill the upper bits with zeros.  Fail if the layout of either
2336    mode is unknown (as for CC modes) or if the extraction would involve
2337    unprofitable mode punning.  Return the value on success, otherwise
2338    return null.
2339
2340    This is different from gen_lowpart* in these respects:
2341
2342      - the returned value must always be considered an rvalue
2343
2344      - when MODE is wider than SRC_MODE, the extraction involves
2345        a zero extension
2346
2347      - when MODE is smaller than SRC_MODE, the extraction involves
2348        a truncation (and is thus subject to TARGET_TRULY_NOOP_TRUNCATION).
2349
2350    In other words, this routine performs a computation, whereas the
2351    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2352    operations.  */
2353
2354 rtx
2355 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2356 {
2357   scalar_int_mode int_mode, src_int_mode;
2358
2359   if (mode == src_mode)
2360     return src;
2361
2362   if (CONSTANT_P (src))
2363     {
2364       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2365          fails, it will happily create (subreg (symbol_ref)) or similar
2366          invalid SUBREGs.  */
2367       poly_uint64 byte = subreg_lowpart_offset (mode, src_mode);
2368       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2369       if (ret)
2370         return ret;
2371
2372       if (GET_MODE (src) == VOIDmode
2373           || !validate_subreg (mode, src_mode, src, byte))
2374         return NULL_RTX;
2375
2376       src = force_reg (GET_MODE (src), src);
2377       return gen_rtx_SUBREG (mode, src, byte);
2378     }
2379
2380   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2381     return NULL_RTX;
2382
2383   if (known_eq (GET_MODE_BITSIZE (mode), GET_MODE_BITSIZE (src_mode))
2384       && targetm.modes_tieable_p (mode, src_mode))
2385     {
2386       rtx x = gen_lowpart_common (mode, src);
2387       if (x)
2388         return x;
2389     }
2390
2391   if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
2392       || !int_mode_for_mode (mode).exists (&int_mode))
2393     return NULL_RTX;
2394
2395   if (!targetm.modes_tieable_p (src_int_mode, src_mode))
2396     return NULL_RTX;
2397   if (!targetm.modes_tieable_p (int_mode, mode))
2398     return NULL_RTX;
2399
2400   src = gen_lowpart (src_int_mode, src);
2401   src = convert_modes (int_mode, src_int_mode, src, true);
2402   src = gen_lowpart (mode, src);
2403   return src;
2404 }
2405 \f
2406 /* Add INC into TARGET.  */
2407
2408 void
2409 expand_inc (rtx target, rtx inc)
2410 {
2411   rtx value = expand_binop (GET_MODE (target), add_optab,
2412                             target, inc,
2413                             target, 0, OPTAB_LIB_WIDEN);
2414   if (value != target)
2415     emit_move_insn (target, value);
2416 }
2417
2418 /* Subtract DEC from TARGET.  */
2419
2420 void
2421 expand_dec (rtx target, rtx dec)
2422 {
2423   rtx value = expand_binop (GET_MODE (target), sub_optab,
2424                             target, dec,
2425                             target, 0, OPTAB_LIB_WIDEN);
2426   if (value != target)
2427     emit_move_insn (target, value);
2428 }
2429 \f
2430 /* Output a shift instruction for expression code CODE,
2431    with SHIFTED being the rtx for the value to shift,
2432    and AMOUNT the rtx for the amount to shift by.
2433    Store the result in the rtx TARGET, if that is convenient.
2434    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2435    Return the rtx for where the value is.
2436    If that cannot be done, abort the compilation unless MAY_FAIL is true,
2437    in which case 0 is returned.  */
2438
2439 static rtx
2440 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2441                 rtx amount, rtx target, int unsignedp, bool may_fail = false)
2442 {
2443   rtx op1, temp = 0;
2444   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2445   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2446   optab lshift_optab = ashl_optab;
2447   optab rshift_arith_optab = ashr_optab;
2448   optab rshift_uns_optab = lshr_optab;
2449   optab lrotate_optab = rotl_optab;
2450   optab rrotate_optab = rotr_optab;
2451   machine_mode op1_mode;
2452   scalar_mode scalar_mode = GET_MODE_INNER (mode);
2453   int attempt;
2454   bool speed = optimize_insn_for_speed_p ();
2455
2456   op1 = amount;
2457   op1_mode = GET_MODE (op1);
2458
2459   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2460      shift amount is a vector, use the vector/vector shift patterns.  */
2461   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2462     {
2463       lshift_optab = vashl_optab;
2464       rshift_arith_optab = vashr_optab;
2465       rshift_uns_optab = vlshr_optab;
2466       lrotate_optab = vrotl_optab;
2467       rrotate_optab = vrotr_optab;
2468     }
2469
2470   /* Previously detected shift-counts computed by NEGATE_EXPR
2471      and shifted in the other direction; but that does not work
2472      on all machines.  */
2473
2474   if (SHIFT_COUNT_TRUNCATED)
2475     {
2476       if (CONST_INT_P (op1)
2477           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2478               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2479         op1 = gen_int_shift_amount (mode,
2480                                     (unsigned HOST_WIDE_INT) INTVAL (op1)
2481                                     % GET_MODE_BITSIZE (scalar_mode));
2482       else if (GET_CODE (op1) == SUBREG
2483                && subreg_lowpart_p (op1)
2484                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2485                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2486         op1 = SUBREG_REG (op1);
2487     }
2488
2489   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2490      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2491      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2492      amount instead.  */
2493   if (rotate
2494       && CONST_INT_P (op1)
2495       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2496                    GET_MODE_BITSIZE (scalar_mode) - 1))
2497     {
2498       op1 = gen_int_shift_amount (mode, (GET_MODE_BITSIZE (scalar_mode)
2499                                          - INTVAL (op1)));
2500       left = !left;
2501       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2502     }
2503
2504   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2505      Note that this is not the case for bigger values.  For instance a rotation
2506      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2507      0x04030201 (bswapsi).  */
2508   if (rotate
2509       && CONST_INT_P (op1)
2510       && INTVAL (op1) == BITS_PER_UNIT
2511       && GET_MODE_SIZE (scalar_mode) == 2
2512       && optab_handler (bswap_optab, mode) != CODE_FOR_nothing)
2513     return expand_unop (mode, bswap_optab, shifted, NULL_RTX, unsignedp);
2514
2515   if (op1 == const0_rtx)
2516     return shifted;
2517
2518   /* Check whether its cheaper to implement a left shift by a constant
2519      bit count by a sequence of additions.  */
2520   if (code == LSHIFT_EXPR
2521       && CONST_INT_P (op1)
2522       && INTVAL (op1) > 0
2523       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2524       && INTVAL (op1) < MAX_BITS_PER_WORD
2525       && (shift_cost (speed, mode, INTVAL (op1))
2526           > INTVAL (op1) * add_cost (speed, mode))
2527       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2528     {
2529       int i;
2530       for (i = 0; i < INTVAL (op1); i++)
2531         {
2532           temp = force_reg (mode, shifted);
2533           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2534                                   unsignedp, OPTAB_LIB_WIDEN);
2535         }
2536       return shifted;
2537     }
2538
2539   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2540     {
2541       enum optab_methods methods;
2542
2543       if (attempt == 0)
2544         methods = OPTAB_DIRECT;
2545       else if (attempt == 1)
2546         methods = OPTAB_WIDEN;
2547       else
2548         methods = OPTAB_LIB_WIDEN;
2549
2550       if (rotate)
2551         {
2552           /* Widening does not work for rotation.  */
2553           if (methods == OPTAB_WIDEN)
2554             continue;
2555           else if (methods == OPTAB_LIB_WIDEN)
2556             {
2557               /* If we have been unable to open-code this by a rotation,
2558                  do it as the IOR of two shifts.  I.e., to rotate A
2559                  by N bits, compute
2560                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2561                  where C is the bitsize of A.
2562
2563                  It is theoretically possible that the target machine might
2564                  not be able to perform either shift and hence we would
2565                  be making two libcalls rather than just the one for the
2566                  shift (similarly if IOR could not be done).  We will allow
2567                  this extremely unlikely lossage to avoid complicating the
2568                  code below.  */
2569
2570               rtx subtarget = target == shifted ? 0 : target;
2571               rtx new_amount, other_amount;
2572               rtx temp1;
2573
2574               new_amount = op1;
2575               if (op1 == const0_rtx)
2576                 return shifted;
2577               else if (CONST_INT_P (op1))
2578                 other_amount = gen_int_shift_amount
2579                   (mode, GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2580               else
2581                 {
2582                   other_amount
2583                     = simplify_gen_unary (NEG, GET_MODE (op1),
2584                                           op1, GET_MODE (op1));
2585                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2586                   other_amount
2587                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2588                                            gen_int_mode (mask, GET_MODE (op1)));
2589                 }
2590
2591               shifted = force_reg (mode, shifted);
2592
2593               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2594                                      mode, shifted, new_amount, 0, 1);
2595               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2596                                       mode, shifted, other_amount,
2597                                       subtarget, 1);
2598               return expand_binop (mode, ior_optab, temp, temp1, target,
2599                                    unsignedp, methods);
2600             }
2601
2602           temp = expand_binop (mode,
2603                                left ? lrotate_optab : rrotate_optab,
2604                                shifted, op1, target, unsignedp, methods);
2605         }
2606       else if (unsignedp)
2607         temp = expand_binop (mode,
2608                              left ? lshift_optab : rshift_uns_optab,
2609                              shifted, op1, target, unsignedp, methods);
2610
2611       /* Do arithmetic shifts.
2612          Also, if we are going to widen the operand, we can just as well
2613          use an arithmetic right-shift instead of a logical one.  */
2614       if (temp == 0 && ! rotate
2615           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2616         {
2617           enum optab_methods methods1 = methods;
2618
2619           /* If trying to widen a log shift to an arithmetic shift,
2620              don't accept an arithmetic shift of the same size.  */
2621           if (unsignedp)
2622             methods1 = OPTAB_MUST_WIDEN;
2623
2624           /* Arithmetic shift */
2625
2626           temp = expand_binop (mode,
2627                                left ? lshift_optab : rshift_arith_optab,
2628                                shifted, op1, target, unsignedp, methods1);
2629         }
2630
2631       /* We used to try extzv here for logical right shifts, but that was
2632          only useful for one machine, the VAX, and caused poor code
2633          generation there for lshrdi3, so the code was deleted and a
2634          define_expand for lshrsi3 was added to vax.md.  */
2635     }
2636
2637   gcc_assert (temp != NULL_RTX || may_fail);
2638   return temp;
2639 }
2640
2641 /* Output a shift instruction for expression code CODE,
2642    with SHIFTED being the rtx for the value to shift,
2643    and AMOUNT the amount to shift by.
2644    Store the result in the rtx TARGET, if that is convenient.
2645    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2646    Return the rtx for where the value is.  */
2647
2648 rtx
2649 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2650               poly_int64 amount, rtx target, int unsignedp)
2651 {
2652   return expand_shift_1 (code, mode, shifted,
2653                          gen_int_shift_amount (mode, amount),
2654                          target, unsignedp);
2655 }
2656
2657 /* Likewise, but return 0 if that cannot be done.  */
2658
2659 static rtx
2660 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2661                     int amount, rtx target, int unsignedp)
2662 {
2663   return expand_shift_1 (code, mode,
2664                          shifted, GEN_INT (amount), target, unsignedp, true);
2665 }
2666
2667 /* Output a shift instruction for expression code CODE,
2668    with SHIFTED being the rtx for the value to shift,
2669    and AMOUNT the tree for the amount to shift by.
2670    Store the result in the rtx TARGET, if that is convenient.
2671    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2672    Return the rtx for where the value is.  */
2673
2674 rtx
2675 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2676                        tree amount, rtx target, int unsignedp)
2677 {
2678   return expand_shift_1 (code, mode,
2679                          shifted, expand_normal (amount), target, unsignedp);
2680 }
2681
2682 \f
2683 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2684                         const struct mult_cost *, machine_mode mode);
2685 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2686                               const struct algorithm *, enum mult_variant);
2687 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2688 static rtx extract_high_half (scalar_int_mode, rtx);
2689 static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int);
2690 static rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx,
2691                                        int, int);
2692 /* Compute and return the best algorithm for multiplying by T.
2693    The algorithm must cost less than cost_limit
2694    If retval.cost >= COST_LIMIT, no algorithm was found and all
2695    other field of the returned struct are undefined.
2696    MODE is the machine mode of the multiplication.  */
2697
2698 static void
2699 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2700             const struct mult_cost *cost_limit, machine_mode mode)
2701 {
2702   int m;
2703   struct algorithm *alg_in, *best_alg;
2704   struct mult_cost best_cost;
2705   struct mult_cost new_limit;
2706   int op_cost, op_latency;
2707   unsigned HOST_WIDE_INT orig_t = t;
2708   unsigned HOST_WIDE_INT q;
2709   int maxm, hash_index;
2710   bool cache_hit = false;
2711   enum alg_code cache_alg = alg_zero;
2712   bool speed = optimize_insn_for_speed_p ();
2713   scalar_int_mode imode;
2714   struct alg_hash_entry *entry_ptr;
2715
2716   /* Indicate that no algorithm is yet found.  If no algorithm
2717      is found, this value will be returned and indicate failure.  */
2718   alg_out->cost.cost = cost_limit->cost + 1;
2719   alg_out->cost.latency = cost_limit->latency + 1;
2720
2721   if (cost_limit->cost < 0
2722       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2723     return;
2724
2725   /* Be prepared for vector modes.  */
2726   imode = as_a <scalar_int_mode> (GET_MODE_INNER (mode));
2727
2728   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2729
2730   /* Restrict the bits of "t" to the multiplication's mode.  */
2731   t &= GET_MODE_MASK (imode);
2732
2733   /* t == 1 can be done in zero cost.  */
2734   if (t == 1)
2735     {
2736       alg_out->ops = 1;
2737       alg_out->cost.cost = 0;
2738       alg_out->cost.latency = 0;
2739       alg_out->op[0] = alg_m;
2740       return;
2741     }
2742
2743   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2744      fail now.  */
2745   if (t == 0)
2746     {
2747       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2748         return;
2749       else
2750         {
2751           alg_out->ops = 1;
2752           alg_out->cost.cost = zero_cost (speed);
2753           alg_out->cost.latency = zero_cost (speed);
2754           alg_out->op[0] = alg_zero;
2755           return;
2756         }
2757     }
2758
2759   /* We'll be needing a couple extra algorithm structures now.  */
2760
2761   alg_in = XALLOCA (struct algorithm);
2762   best_alg = XALLOCA (struct algorithm);
2763   best_cost = *cost_limit;
2764
2765   /* Compute the hash index.  */
2766   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2767
2768   /* See if we already know what to do for T.  */
2769   entry_ptr = alg_hash_entry_ptr (hash_index);
2770   if (entry_ptr->t == t
2771       && entry_ptr->mode == mode
2772       && entry_ptr->speed == speed
2773       && entry_ptr->alg != alg_unknown)
2774     {
2775       cache_alg = entry_ptr->alg;
2776
2777       if (cache_alg == alg_impossible)
2778         {
2779           /* The cache tells us that it's impossible to synthesize
2780              multiplication by T within entry_ptr->cost.  */
2781           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2782             /* COST_LIMIT is at least as restrictive as the one
2783                recorded in the hash table, in which case we have no
2784                hope of synthesizing a multiplication.  Just
2785                return.  */
2786             return;
2787
2788           /* If we get here, COST_LIMIT is less restrictive than the
2789              one recorded in the hash table, so we may be able to
2790              synthesize a multiplication.  Proceed as if we didn't
2791              have the cache entry.  */
2792         }
2793       else
2794         {
2795           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2796             /* The cached algorithm shows that this multiplication
2797                requires more cost than COST_LIMIT.  Just return.  This
2798                way, we don't clobber this cache entry with
2799                alg_impossible but retain useful information.  */
2800             return;
2801
2802           cache_hit = true;
2803
2804           switch (cache_alg)
2805             {
2806             case alg_shift:
2807               goto do_alg_shift;
2808
2809             case alg_add_t_m2:
2810             case alg_sub_t_m2:
2811               goto do_alg_addsub_t_m2;
2812
2813             case alg_add_factor:
2814             case alg_sub_factor:
2815               goto do_alg_addsub_factor;
2816
2817             case alg_add_t2_m:
2818               goto do_alg_add_t2_m;
2819
2820             case alg_sub_t2_m:
2821               goto do_alg_sub_t2_m;
2822
2823             default:
2824               gcc_unreachable ();
2825             }
2826         }
2827     }
2828
2829   /* If we have a group of zero bits at the low-order part of T, try
2830      multiplying by the remaining bits and then doing a shift.  */
2831
2832   if ((t & 1) == 0)
2833     {
2834     do_alg_shift:
2835       m = ctz_or_zero (t); /* m = number of low zero bits */
2836       if (m < maxm)
2837         {
2838           q = t >> m;
2839           /* The function expand_shift will choose between a shift and
2840              a sequence of additions, so the observed cost is given as
2841              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2842           op_cost = m * add_cost (speed, mode);
2843           if (shift_cost (speed, mode, m) < op_cost)
2844             op_cost = shift_cost (speed, mode, m);
2845           new_limit.cost = best_cost.cost - op_cost;
2846           new_limit.latency = best_cost.latency - op_cost;
2847           synth_mult (alg_in, q, &new_limit, mode);
2848
2849           alg_in->cost.cost += op_cost;
2850           alg_in->cost.latency += op_cost;
2851           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2852             {
2853               best_cost = alg_in->cost;
2854               std::swap (alg_in, best_alg);
2855               best_alg->log[best_alg->ops] = m;
2856               best_alg->op[best_alg->ops] = alg_shift;
2857             }
2858
2859           /* See if treating ORIG_T as a signed number yields a better
2860              sequence.  Try this sequence only for a negative ORIG_T
2861              as it would be useless for a non-negative ORIG_T.  */
2862           if ((HOST_WIDE_INT) orig_t < 0)
2863             {
2864               /* Shift ORIG_T as follows because a right shift of a
2865                  negative-valued signed type is implementation
2866                  defined.  */
2867               q = ~(~orig_t >> m);
2868               /* The function expand_shift will choose between a shift
2869                  and a sequence of additions, so the observed cost is
2870                  given as MIN (m * add_cost(speed, mode),
2871                  shift_cost(speed, mode, m)).  */
2872               op_cost = m * add_cost (speed, mode);
2873               if (shift_cost (speed, mode, m) < op_cost)
2874                 op_cost = shift_cost (speed, mode, m);
2875               new_limit.cost = best_cost.cost - op_cost;
2876               new_limit.latency = best_cost.latency - op_cost;
2877               synth_mult (alg_in, q, &new_limit, mode);
2878
2879               alg_in->cost.cost += op_cost;
2880               alg_in->cost.latency += op_cost;
2881               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2882                 {
2883                   best_cost = alg_in->cost;
2884                   std::swap (alg_in, best_alg);
2885                   best_alg->log[best_alg->ops] = m;
2886                   best_alg->op[best_alg->ops] = alg_shift;
2887                 }
2888             }
2889         }
2890       if (cache_hit)
2891         goto done;
2892     }
2893
2894   /* If we have an odd number, add or subtract one.  */
2895   if ((t & 1) != 0)
2896     {
2897       unsigned HOST_WIDE_INT w;
2898
2899     do_alg_addsub_t_m2:
2900       for (w = 1; (w & t) != 0; w <<= 1)
2901         ;
2902       /* If T was -1, then W will be zero after the loop.  This is another
2903          case where T ends with ...111.  Handling this with (T + 1) and
2904          subtract 1 produces slightly better code and results in algorithm
2905          selection much faster than treating it like the ...0111 case
2906          below.  */
2907       if (w == 0
2908           || (w > 2
2909               /* Reject the case where t is 3.
2910                  Thus we prefer addition in that case.  */
2911               && t != 3))
2912         {
2913           /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2914
2915           op_cost = add_cost (speed, mode);
2916           new_limit.cost = best_cost.cost - op_cost;
2917           new_limit.latency = best_cost.latency - op_cost;
2918           synth_mult (alg_in, t + 1, &new_limit, mode);
2919
2920           alg_in->cost.cost += op_cost;
2921           alg_in->cost.latency += op_cost;
2922           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2923             {
2924               best_cost = alg_in->cost;
2925               std::swap (alg_in, best_alg);
2926               best_alg->log[best_alg->ops] = 0;
2927               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2928             }
2929         }
2930       else
2931         {
2932           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2933
2934           op_cost = add_cost (speed, mode);
2935           new_limit.cost = best_cost.cost - op_cost;
2936           new_limit.latency = best_cost.latency - op_cost;
2937           synth_mult (alg_in, t - 1, &new_limit, mode);
2938
2939           alg_in->cost.cost += op_cost;
2940           alg_in->cost.latency += op_cost;
2941           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2942             {
2943               best_cost = alg_in->cost;
2944               std::swap (alg_in, best_alg);
2945               best_alg->log[best_alg->ops] = 0;
2946               best_alg->op[best_alg->ops] = alg_add_t_m2;
2947             }
2948         }
2949
2950       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2951          quickly with a - a * n for some appropriate constant n.  */
2952       m = exact_log2 (-orig_t + 1);
2953       if (m >= 0 && m < maxm)
2954         {
2955           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2956           /* If the target has a cheap shift-and-subtract insn use
2957              that in preference to a shift insn followed by a sub insn.
2958              Assume that the shift-and-sub is "atomic" with a latency
2959              equal to it's cost, otherwise assume that on superscalar
2960              hardware the shift may be executed concurrently with the
2961              earlier steps in the algorithm.  */
2962           if (shiftsub1_cost (speed, mode, m) <= op_cost)
2963             {
2964               op_cost = shiftsub1_cost (speed, mode, m);
2965               op_latency = op_cost;
2966             }
2967           else
2968             op_latency = add_cost (speed, mode);
2969
2970           new_limit.cost = best_cost.cost - op_cost;
2971           new_limit.latency = best_cost.latency - op_latency;
2972           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2973                       &new_limit, mode);
2974
2975           alg_in->cost.cost += op_cost;
2976           alg_in->cost.latency += op_latency;
2977           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2978             {
2979               best_cost = alg_in->cost;
2980               std::swap (alg_in, best_alg);
2981               best_alg->log[best_alg->ops] = m;
2982               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2983             }
2984         }
2985
2986       if (cache_hit)
2987         goto done;
2988     }
2989
2990   /* Look for factors of t of the form
2991      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2992      If we find such a factor, we can multiply by t using an algorithm that
2993      multiplies by q, shift the result by m and add/subtract it to itself.
2994
2995      We search for large factors first and loop down, even if large factors
2996      are less probable than small; if we find a large factor we will find a
2997      good sequence quickly, and therefore be able to prune (by decreasing
2998      COST_LIMIT) the search.  */
2999
3000  do_alg_addsub_factor:
3001   for (m = floor_log2 (t - 1); m >= 2; m--)
3002     {
3003       unsigned HOST_WIDE_INT d;
3004
3005       d = (HOST_WIDE_INT_1U << m) + 1;
3006       if (t % d == 0 && t > d && m < maxm
3007           && (!cache_hit || cache_alg == alg_add_factor))
3008         {
3009           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3010           if (shiftadd_cost (speed, mode, m) <= op_cost)
3011             op_cost = shiftadd_cost (speed, mode, m);
3012
3013           op_latency = op_cost;
3014
3015
3016           new_limit.cost = best_cost.cost - op_cost;
3017           new_limit.latency = best_cost.latency - op_latency;
3018           synth_mult (alg_in, t / d, &new_limit, mode);
3019
3020           alg_in->cost.cost += op_cost;
3021           alg_in->cost.latency += op_latency;
3022           if (alg_in->cost.latency < op_cost)
3023             alg_in->cost.latency = op_cost;
3024           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3025             {
3026               best_cost = alg_in->cost;
3027               std::swap (alg_in, best_alg);
3028               best_alg->log[best_alg->ops] = m;
3029               best_alg->op[best_alg->ops] = alg_add_factor;
3030             }
3031           /* Other factors will have been taken care of in the recursion.  */
3032           break;
3033         }
3034
3035       d = (HOST_WIDE_INT_1U << m) - 1;
3036       if (t % d == 0 && t > d && m < maxm
3037           && (!cache_hit || cache_alg == alg_sub_factor))
3038         {
3039           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3040           if (shiftsub0_cost (speed, mode, m) <= op_cost)
3041             op_cost = shiftsub0_cost (speed, mode, m);
3042
3043           op_latency = op_cost;
3044
3045           new_limit.cost = best_cost.cost - op_cost;
3046           new_limit.latency = best_cost.latency - op_latency;
3047           synth_mult (alg_in, t / d, &new_limit, mode);
3048
3049           alg_in->cost.cost += op_cost;
3050           alg_in->cost.latency += op_latency;
3051           if (alg_in->cost.latency < op_cost)
3052             alg_in->cost.latency = op_cost;
3053           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3054             {
3055               best_cost = alg_in->cost;
3056               std::swap (alg_in, best_alg);
3057               best_alg->log[best_alg->ops] = m;
3058               best_alg->op[best_alg->ops] = alg_sub_factor;
3059             }
3060           break;
3061         }
3062     }
3063   if (cache_hit)
3064     goto done;
3065
3066   /* Try shift-and-add (load effective address) instructions,
3067      i.e. do a*3, a*5, a*9.  */
3068   if ((t & 1) != 0)
3069     {
3070     do_alg_add_t2_m:
3071       q = t - 1;
3072       m = ctz_hwi (q);
3073       if (q && m < maxm)
3074         {
3075           op_cost = shiftadd_cost (speed, mode, m);
3076           new_limit.cost = best_cost.cost - op_cost;
3077           new_limit.latency = best_cost.latency - op_cost;
3078           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
3079
3080           alg_in->cost.cost += op_cost;
3081           alg_in->cost.latency += op_cost;
3082           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3083             {
3084               best_cost = alg_in->cost;
3085               std::swap (alg_in, best_alg);
3086               best_alg->log[best_alg->ops] = m;
3087               best_alg->op[best_alg->ops] = alg_add_t2_m;
3088             }
3089         }
3090       if (cache_hit)
3091         goto done;
3092
3093     do_alg_sub_t2_m:
3094       q = t + 1;
3095       m = ctz_hwi (q);
3096       if (q && m < maxm)
3097         {
3098           op_cost = shiftsub0_cost (speed, mode, m);
3099           new_limit.cost = best_cost.cost - op_cost;
3100           new_limit.latency = best_cost.latency - op_cost;
3101           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
3102
3103           alg_in->cost.cost += op_cost;
3104           alg_in->cost.latency += op_cost;
3105           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3106             {
3107               best_cost = alg_in->cost;
3108               std::swap (alg_in, best_alg);
3109               best_alg->log[best_alg->ops] = m;
3110               best_alg->op[best_alg->ops] = alg_sub_t2_m;
3111             }
3112         }
3113       if (cache_hit)
3114         goto done;
3115     }
3116
3117  done:
3118   /* If best_cost has not decreased, we have not found any algorithm.  */
3119   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
3120     {
3121       /* We failed to find an algorithm.  Record alg_impossible for
3122          this case (that is, <T, MODE, COST_LIMIT>) so that next time
3123          we are asked to find an algorithm for T within the same or
3124          lower COST_LIMIT, we can immediately return to the
3125          caller.  */
3126       entry_ptr->t = t;
3127       entry_ptr->mode = mode;
3128       entry_ptr->speed = speed;
3129       entry_ptr->alg = alg_impossible;
3130       entry_ptr->cost = *cost_limit;
3131       return;
3132     }
3133
3134   /* Cache the result.  */
3135   if (!cache_hit)
3136     {
3137       entry_ptr->t = t;
3138       entry_ptr->mode = mode;
3139       entry_ptr->speed = speed;
3140       entry_ptr->alg = best_alg->op[best_alg->ops];
3141       entry_ptr->cost.cost = best_cost.cost;
3142       entry_ptr->cost.latency = best_cost.latency;
3143     }
3144
3145   /* If we are getting a too long sequence for `struct algorithm'
3146      to record, make this search fail.  */
3147   if (best_alg->ops == MAX_BITS_PER_WORD)
3148     return;
3149
3150   /* Copy the algorithm from temporary space to the space at alg_out.
3151      We avoid using structure assignment because the majority of
3152      best_alg is normally undefined, and this is a critical function.  */
3153   alg_out->ops = best_alg->ops + 1;
3154   alg_out->cost = best_cost;
3155   memcpy (alg_out->op, best_alg->op,
3156           alg_out->ops * sizeof *alg_out->op);
3157   memcpy (alg_out->log, best_alg->log,
3158           alg_out->ops * sizeof *alg_out->log);
3159 }
3160 \f
3161 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3162    Try three variations:
3163
3164        - a shift/add sequence based on VAL itself
3165        - a shift/add sequence based on -VAL, followed by a negation
3166        - a shift/add sequence based on VAL - 1, followed by an addition.
3167
3168    Return true if the cheapest of these cost less than MULT_COST,
3169    describing the algorithm in *ALG and final fixup in *VARIANT.  */
3170
3171 bool
3172 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3173                      struct algorithm *alg, enum mult_variant *variant,
3174                      int mult_cost)
3175 {
3176   struct algorithm alg2;
3177   struct mult_cost limit;
3178   int op_cost;
3179   bool speed = optimize_insn_for_speed_p ();
3180
3181   /* Fail quickly for impossible bounds.  */
3182   if (mult_cost < 0)
3183     return false;
3184
3185   /* Ensure that mult_cost provides a reasonable upper bound.
3186      Any constant multiplication can be performed with less
3187      than 2 * bits additions.  */
3188   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3189   if (mult_cost > op_cost)
3190     mult_cost = op_cost;
3191
3192   *variant = basic_variant;
3193   limit.cost = mult_cost;
3194   limit.latency = mult_cost;
3195   synth_mult (alg, val, &limit, mode);
3196
3197   /* This works only if the inverted value actually fits in an
3198      `unsigned int' */
3199   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3200     {
3201       op_cost = neg_cost (speed, mode);
3202       if (MULT_COST_LESS (&alg->cost, mult_cost))
3203         {
3204           limit.cost = alg->cost.cost - op_cost;
3205           limit.latency = alg->cost.latency - op_cost;
3206         }
3207       else
3208         {
3209           limit.cost = mult_cost - op_cost;
3210           limit.latency = mult_cost - op_cost;
3211         }
3212
3213       synth_mult (&alg2, -val, &limit, mode);
3214       alg2.cost.cost += op_cost;
3215       alg2.cost.latency += op_cost;
3216       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3217         *alg = alg2, *variant = negate_variant;
3218     }
3219
3220   /* This proves very useful for division-by-constant.  */
3221   op_cost = add_cost (speed, mode);
3222   if (MULT_COST_LESS (&alg->cost, mult_cost))
3223     {
3224       limit.cost = alg->cost.cost - op_cost;
3225       limit.latency = alg->cost.latency - op_cost;
3226     }
3227   else
3228     {
3229       limit.cost = mult_cost - op_cost;
3230       limit.latency = mult_cost - op_cost;
3231     }
3232
3233   synth_mult (&alg2, val - 1, &limit, mode);
3234   alg2.cost.cost += op_cost;
3235   alg2.cost.latency += op_cost;
3236   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3237     *alg = alg2, *variant = add_variant;
3238
3239   return MULT_COST_LESS (&alg->cost, mult_cost);
3240 }
3241
3242 /* A subroutine of expand_mult, used for constant multiplications.
3243    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3244    convenient.  Use the shift/add sequence described by ALG and apply
3245    the final fixup specified by VARIANT.  */
3246
3247 static rtx
3248 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3249                    rtx target, const struct algorithm *alg,
3250                    enum mult_variant variant)
3251 {
3252   unsigned HOST_WIDE_INT val_so_far;
3253   rtx_insn *insn;
3254   rtx accum, tem;
3255   int opno;
3256   machine_mode nmode;
3257
3258   /* Avoid referencing memory over and over and invalid sharing
3259      on SUBREGs.  */
3260   op0 = force_reg (mode, op0);
3261
3262   /* ACCUM starts out either as OP0 or as a zero, depending on
3263      the first operation.  */
3264
3265   if (alg->op[0] == alg_zero)
3266     {
3267       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3268       val_so_far = 0;
3269     }
3270   else if (alg->op[0] == alg_m)
3271     {
3272       accum = copy_to_mode_reg (mode, op0);
3273       val_so_far = 1;
3274     }
3275   else
3276     gcc_unreachable ();
3277
3278   for (opno = 1; opno < alg->ops; opno++)
3279     {
3280       int log = alg->log[opno];
3281       rtx shift_subtarget = optimize ? 0 : accum;
3282       rtx add_target
3283         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3284            && !optimize)
3285           ? target : 0;
3286       rtx accum_target = optimize ? 0 : accum;
3287       rtx accum_inner;
3288
3289       switch (alg->op[opno])
3290         {
3291         case alg_shift:
3292           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3293           /* REG_EQUAL note will be attached to the following insn.  */
3294           emit_move_insn (accum, tem);
3295           val_so_far <<= log;
3296           break;
3297
3298         case alg_add_t_m2:
3299           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3300           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3301                                  add_target ? add_target : accum_target);
3302           val_so_far += HOST_WIDE_INT_1U << log;
3303           break;
3304
3305         case alg_sub_t_m2:
3306           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3307           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3308                                  add_target ? add_target : accum_target);
3309           val_so_far -= HOST_WIDE_INT_1U << log;
3310           break;
3311
3312         case alg_add_t2_m:
3313           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3314                                 log, shift_subtarget, 0);
3315           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3316                                  add_target ? add_target : accum_target);
3317           val_so_far = (val_so_far << log) + 1;
3318           break;
3319
3320         case alg_sub_t2_m:
3321           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3322                                 log, shift_subtarget, 0);
3323           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3324                                  add_target ? add_target : accum_target);
3325           val_so_far = (val_so_far << log) - 1;
3326           break;
3327
3328         case alg_add_factor:
3329           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3330           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3331                                  add_target ? add_target : accum_target);
3332           val_so_far += val_so_far << log;
3333           break;
3334
3335         case alg_sub_factor:
3336           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3337           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3338                                  (add_target
3339                                   ? add_target : (optimize ? 0 : tem)));
3340           val_so_far = (val_so_far << log) - val_so_far;
3341           break;
3342
3343         default:
3344           gcc_unreachable ();
3345         }
3346
3347       if (SCALAR_INT_MODE_P (mode))
3348         {
3349           /* Write a REG_EQUAL note on the last insn so that we can cse
3350              multiplication sequences.  Note that if ACCUM is a SUBREG,
3351              we've set the inner register and must properly indicate that.  */
3352           tem = op0, nmode = mode;
3353           accum_inner = accum;
3354           if (GET_CODE (accum) == SUBREG)
3355             {
3356               accum_inner = SUBREG_REG (accum);
3357               nmode = GET_MODE (accum_inner);
3358               tem = gen_lowpart (nmode, op0);
3359             }
3360
3361           insn = get_last_insn ();
3362           set_dst_reg_note (insn, REG_EQUAL,
3363                             gen_rtx_MULT (nmode, tem,
3364                                           gen_int_mode (val_so_far, nmode)),
3365                             accum_inner);
3366         }
3367     }
3368
3369   if (variant == negate_variant)
3370     {
3371       val_so_far = -val_so_far;
3372       accum = expand_unop (mode, neg_optab, accum, target, 0);
3373     }
3374   else if (variant == add_variant)
3375     {
3376       val_so_far = val_so_far + 1;
3377       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3378     }
3379
3380   /* Compare only the bits of val and val_so_far that are significant
3381      in the result mode, to avoid sign-/zero-extension confusion.  */
3382   nmode = GET_MODE_INNER (mode);
3383   val &= GET_MODE_MASK (nmode);
3384   val_so_far &= GET_MODE_MASK (nmode);
3385   gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3386
3387   return accum;
3388 }
3389
3390 /* Perform a multiplication and return an rtx for the result.
3391    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3392    TARGET is a suggestion for where to store the result (an rtx).
3393
3394    We check specially for a constant integer as OP1.
3395    If you want this check for OP0 as well, then before calling
3396    you should swap the two operands if OP0 would be constant.  */
3397
3398 rtx
3399 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3400              int unsignedp, bool no_libcall)
3401 {
3402   enum mult_variant variant;
3403   struct algorithm algorithm;
3404   rtx scalar_op1;
3405   int max_cost;
3406   bool speed = optimize_insn_for_speed_p ();
3407   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3408
3409   if (CONSTANT_P (op0))
3410     std::swap (op0, op1);
3411
3412   /* For vectors, there are several simplifications that can be made if
3413      all elements of the vector constant are identical.  */
3414   scalar_op1 = unwrap_const_vec_duplicate (op1);
3415
3416   if (INTEGRAL_MODE_P (mode))
3417     {
3418       rtx fake_reg;
3419       HOST_WIDE_INT coeff;
3420       bool is_neg;
3421       int mode_bitsize;
3422
3423       if (op1 == CONST0_RTX (mode))
3424         return op1;
3425       if (op1 == CONST1_RTX (mode))
3426         return op0;
3427       if (op1 == CONSTM1_RTX (mode))
3428         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3429                             op0, target, 0);
3430
3431       if (do_trapv)
3432         goto skip_synth;
3433
3434       /* If mode is integer vector mode, check if the backend supports
3435          vector lshift (by scalar or vector) at all.  If not, we can't use
3436          synthetized multiply.  */
3437       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3438           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3439           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3440         goto skip_synth;
3441
3442       /* These are the operations that are potentially turned into
3443          a sequence of shifts and additions.  */
3444       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3445
3446       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3447          less than or equal in size to `unsigned int' this doesn't matter.
3448          If the mode is larger than `unsigned int', then synth_mult works
3449          only if the constant value exactly fits in an `unsigned int' without
3450          any truncation.  This means that multiplying by negative values does
3451          not work; results are off by 2^32 on a 32 bit machine.  */
3452       if (CONST_INT_P (scalar_op1))
3453         {
3454           coeff = INTVAL (scalar_op1);
3455           is_neg = coeff < 0;
3456         }
3457 #if TARGET_SUPPORTS_WIDE_INT
3458       else if (CONST_WIDE_INT_P (scalar_op1))
3459 #else
3460       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3461 #endif
3462         {
3463           int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3464           /* Perfect power of 2 (other than 1, which is handled above).  */
3465           if (shift > 0)
3466             return expand_shift (LSHIFT_EXPR, mode, op0,
3467                                  shift, target, unsignedp);
3468           else
3469             goto skip_synth;
3470         }
3471       else
3472         goto skip_synth;
3473
3474       /* We used to test optimize here, on the grounds that it's better to
3475          produce a smaller program when -O is not used.  But this causes
3476          such a terrible slowdown sometimes that it seems better to always
3477          use synth_mult.  */
3478
3479       /* Special case powers of two.  */
3480       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3481           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3482         return expand_shift (LSHIFT_EXPR, mode, op0,
3483                              floor_log2 (coeff), target, unsignedp);
3484
3485       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3486
3487       /* Attempt to handle multiplication of DImode values by negative
3488          coefficients, by performing the multiplication by a positive
3489          multiplier and then inverting the result.  */
3490       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3491         {
3492           /* Its safe to use -coeff even for INT_MIN, as the
3493              result is interpreted as an unsigned coefficient.
3494              Exclude cost of op0 from max_cost to match the cost
3495              calculation of the synth_mult.  */
3496           coeff = -(unsigned HOST_WIDE_INT) coeff;
3497           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3498                                     mode, speed)
3499                       - neg_cost (speed, mode));
3500           if (max_cost <= 0)
3501             goto skip_synth;
3502
3503           /* Special case powers of two.  */
3504           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3505             {
3506               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3507                                        floor_log2 (coeff), target, unsignedp);
3508               return expand_unop (mode, neg_optab, temp, target, 0);
3509             }
3510
3511           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3512                                    max_cost))
3513             {
3514               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3515                                             &algorithm, variant);
3516               return expand_unop (mode, neg_optab, temp, target, 0);
3517             }
3518           goto skip_synth;
3519         }
3520
3521       /* Exclude cost of op0 from max_cost to match the cost
3522          calculation of the synth_mult.  */
3523       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3524       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3525         return expand_mult_const (mode, op0, coeff, target,
3526                                   &algorithm, variant);
3527     }
3528  skip_synth:
3529
3530   /* Expand x*2.0 as x+x.  */
3531   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3532       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3533     {
3534       op0 = force_reg (GET_MODE (op0), op0);
3535       return expand_binop (mode, add_optab, op0, op0,
3536                            target, unsignedp,
3537                            no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3538     }
3539
3540   /* This used to use umul_optab if unsigned, but for non-widening multiply
3541      there is no difference between signed and unsigned.  */
3542   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3543                       op0, op1, target, unsignedp,
3544                       no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3545   gcc_assert (op0 || no_libcall);
3546   return op0;
3547 }
3548
3549 /* Return a cost estimate for multiplying a register by the given
3550    COEFFicient in the given MODE and SPEED.  */
3551
3552 int
3553 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3554 {
3555   int max_cost;
3556   struct algorithm algorithm;
3557   enum mult_variant variant;
3558
3559   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3560   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3561                            mode, speed);
3562   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3563     return algorithm.cost.cost;
3564   else
3565     return max_cost;
3566 }
3567
3568 /* Perform a widening multiplication and return an rtx for the result.
3569    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3570    TARGET is a suggestion for where to store the result (an rtx).
3571    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3572    or smul_widen_optab.
3573
3574    We check specially for a constant integer as OP1, comparing the
3575    cost of a widening multiply against the cost of a sequence of shifts
3576    and adds.  */
3577
3578 rtx
3579 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3580                       int unsignedp, optab this_optab)
3581 {
3582   bool speed = optimize_insn_for_speed_p ();
3583   rtx cop1;
3584
3585   if (CONST_INT_P (op1)
3586       && GET_MODE (op0) != VOIDmode
3587       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3588                                 this_optab == umul_widen_optab))
3589       && CONST_INT_P (cop1)
3590       && (INTVAL (cop1) >= 0
3591           || HWI_COMPUTABLE_MODE_P (mode)))
3592     {
3593       HOST_WIDE_INT coeff = INTVAL (cop1);
3594       int max_cost;
3595       enum mult_variant variant;
3596       struct algorithm algorithm;
3597
3598       if (coeff == 0)
3599         return CONST0_RTX (mode);
3600
3601       /* Special case powers of two.  */
3602       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3603         {
3604           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3605           return expand_shift (LSHIFT_EXPR, mode, op0,
3606                                floor_log2 (coeff), target, unsignedp);
3607         }
3608
3609       /* Exclude cost of op0 from max_cost to match the cost
3610          calculation of the synth_mult.  */
3611       max_cost = mul_widen_cost (speed, mode);
3612       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3613                                max_cost))
3614         {
3615           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3616           return expand_mult_const (mode, op0, coeff, target,
3617                                     &algorithm, variant);
3618         }
3619     }
3620   return expand_binop (mode, this_optab, op0, op1, target,
3621                        unsignedp, OPTAB_LIB_WIDEN);
3622 }
3623 \f
3624 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3625    replace division by D, and put the least significant N bits of the result
3626    in *MULTIPLIER_PTR and return the most significant bit.
3627
3628    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3629    needed precision is in PRECISION (should be <= N).
3630
3631    PRECISION should be as small as possible so this function can choose
3632    multiplier more freely.
3633
3634    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3635    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3636
3637    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3638    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3639
3640 unsigned HOST_WIDE_INT
3641 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3642                    unsigned HOST_WIDE_INT *multiplier_ptr,
3643                    int *post_shift_ptr, int *lgup_ptr)
3644 {
3645   int lgup, post_shift;
3646   int pow, pow2;
3647
3648   /* lgup = ceil(log2(divisor)); */
3649   lgup = ceil_log2 (d);
3650
3651   gcc_assert (lgup <= n);
3652
3653   pow = n + lgup;
3654   pow2 = n + lgup - precision;
3655
3656   /* mlow = 2^(N + lgup)/d */
3657   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3658   wide_int mlow = wi::udiv_trunc (val, d);
3659
3660   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3661   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3662   wide_int mhigh = wi::udiv_trunc (val, d);
3663
3664   /* If precision == N, then mlow, mhigh exceed 2^N
3665      (but they do not exceed 2^(N+1)).  */
3666
3667   /* Reduce to lowest terms.  */
3668   for (post_shift = lgup; post_shift > 0; post_shift--)
3669     {
3670       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3671                                                        HOST_BITS_PER_WIDE_INT);
3672       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3673                                                        HOST_BITS_PER_WIDE_INT);
3674       if (ml_lo >= mh_lo)
3675         break;
3676
3677       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3678       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3679     }
3680
3681   *post_shift_ptr = post_shift;
3682   *lgup_ptr = lgup;
3683   if (n < HOST_BITS_PER_WIDE_INT)
3684     {
3685       unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3686       *multiplier_ptr = mhigh.to_uhwi () & mask;
3687       return mhigh.to_uhwi () >= mask;
3688     }
3689   else
3690     {
3691       *multiplier_ptr = mhigh.to_uhwi ();
3692       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3693     }
3694 }
3695
3696 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3697    congruent to 1 (mod 2**N).  */
3698
3699 static unsigned HOST_WIDE_INT
3700 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3701 {
3702   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3703
3704   /* The algorithm notes that the choice y = x satisfies
3705      x*y == 1 mod 2^3, since x is assumed odd.
3706      Each iteration doubles the number of bits of significance in y.  */
3707
3708   unsigned HOST_WIDE_INT mask;
3709   unsigned HOST_WIDE_INT y = x;
3710   int nbit = 3;
3711
3712   mask = (n == HOST_BITS_PER_WIDE_INT
3713           ? HOST_WIDE_INT_M1U
3714           : (HOST_WIDE_INT_1U << n) - 1);
3715
3716   while (nbit < n)
3717     {
3718       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3719       nbit *= 2;
3720     }
3721   return y;
3722 }
3723
3724 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3725    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3726    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3727    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3728    become signed.
3729
3730    The result is put in TARGET if that is convenient.
3731
3732    MODE is the mode of operation.  */
3733
3734 rtx
3735 expand_mult_highpart_adjust (scalar_int_mode mode, rtx adj_operand, rtx op0,
3736                              rtx op1, rtx target, int unsignedp)
3737 {
3738   rtx tem;
3739   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3740
3741   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3742                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3743   tem = expand_and (mode, tem, op1, NULL_RTX);
3744   adj_operand
3745     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3746                      adj_operand);
3747
3748   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3749                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3750   tem = expand_and (mode, tem, op0, NULL_RTX);
3751   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3752                           target);
3753
3754   return target;
3755 }
3756
3757 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3758
3759 static rtx
3760 extract_high_half (scalar_int_mode mode, rtx op)
3761 {
3762   if (mode == word_mode)
3763     return gen_highpart (mode, op);
3764
3765   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3766
3767   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3768                      GET_MODE_BITSIZE (mode), 0, 1);
3769   return convert_modes (mode, wider_mode, op, 0);
3770 }
3771
3772 /* Like expmed_mult_highpart, but only consider using a multiplication
3773    optab.  OP1 is an rtx for the constant operand.  */
3774
3775 static rtx
3776 expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1,
3777                             rtx target, int unsignedp, int max_cost)
3778 {
3779   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3780   optab moptab;
3781   rtx tem;
3782   int size;
3783   bool speed = optimize_insn_for_speed_p ();
3784
3785   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3786
3787   size = GET_MODE_BITSIZE (mode);
3788
3789   /* Firstly, try using a multiplication insn that only generates the needed
3790      high part of the product, and in the sign flavor of unsignedp.  */
3791   if (mul_highpart_cost (speed, mode) < max_cost)
3792     {
3793       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3794       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3795                           unsignedp, OPTAB_DIRECT);
3796       if (tem)
3797         return tem;
3798     }
3799
3800   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3801      Need to adjust the result after the multiplication.  */
3802   if (size - 1 < BITS_PER_WORD
3803       && (mul_highpart_cost (speed, mode)
3804           + 2 * shift_cost (speed, mode, size-1)
3805           + 4 * add_cost (speed, mode) < max_cost))
3806     {
3807       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3808       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3809                           unsignedp, OPTAB_DIRECT);
3810       if (tem)
3811         /* We used the wrong signedness.  Adjust the result.  */
3812         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3813                                             tem, unsignedp);
3814     }
3815
3816   /* Try widening multiplication.  */
3817   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3818   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3819       && mul_widen_cost (speed, wider_mode) < max_cost)
3820     {
3821       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3822                           unsignedp, OPTAB_WIDEN);
3823       if (tem)
3824         return extract_high_half (mode, tem);
3825     }
3826
3827   /* Try widening the mode and perform a non-widening multiplication.  */
3828   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3829       && size - 1 < BITS_PER_WORD
3830       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3831           < max_cost))
3832     {
3833       rtx_insn *insns;
3834       rtx wop0, wop1;
3835
3836       /* We need to widen the operands, for example to ensure the
3837          constant multiplier is correctly sign or zero extended.
3838          Use a sequence to clean-up any instructions emitted by
3839          the conversions if things don't work out.  */
3840       start_sequence ();
3841       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3842       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3843       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3844                           unsignedp, OPTAB_WIDEN);
3845       insns = get_insns ();
3846       end_sequence ();
3847
3848       if (tem)
3849         {
3850           emit_insn (insns);
3851           return extract_high_half (mode, tem);
3852         }
3853     }
3854
3855   /* Try widening multiplication of opposite signedness, and adjust.  */
3856   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3857   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3858       && size - 1 < BITS_PER_WORD
3859       && (mul_widen_cost (speed, wider_mode)
3860           + 2 * shift_cost (speed, mode, size-1)
3861           + 4 * add_cost (speed, mode) < max_cost))
3862     {
3863       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3864                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3865       if (tem != 0)
3866         {
3867           tem = extract_high_half (mode, tem);
3868           /* We used the wrong signedness.  Adjust the result.  */
3869           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3870                                               target, unsignedp);
3871         }
3872     }
3873
3874   return 0;
3875 }
3876
3877 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3878    putting the high half of the result in TARGET if that is convenient,
3879    and return where the result is.  If the operation can not be performed,
3880    0 is returned.
3881
3882    MODE is the mode of operation and result.
3883
3884    UNSIGNEDP nonzero means unsigned multiply.
3885
3886    MAX_COST is the total allowed cost for the expanded RTL.  */
3887
3888 static rtx
3889 expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1,
3890                       rtx target, int unsignedp, int max_cost)
3891 {
3892   unsigned HOST_WIDE_INT cnst1;
3893   int extra_cost;
3894   bool sign_adjust = false;
3895   enum mult_variant variant;
3896   struct algorithm alg;
3897   rtx tem;
3898   bool speed = optimize_insn_for_speed_p ();
3899
3900   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3901   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3902
3903   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3904
3905   /* We can't optimize modes wider than BITS_PER_WORD.
3906      ??? We might be able to perform double-word arithmetic if
3907      mode == word_mode, however all the cost calculations in
3908      synth_mult etc. assume single-word operations.  */
3909   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3910   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3911     return expmed_mult_highpart_optab (mode, op0, op1, target,
3912                                        unsignedp, max_cost);
3913
3914   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3915
3916   /* Check whether we try to multiply by a negative constant.  */
3917   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3918     {
3919       sign_adjust = true;
3920       extra_cost += add_cost (speed, mode);
3921     }
3922
3923   /* See whether shift/add multiplication is cheap enough.  */
3924   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3925                            max_cost - extra_cost))
3926     {
3927       /* See whether the specialized multiplication optabs are
3928          cheaper than the shift/add version.  */
3929       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3930                                         alg.cost.cost + extra_cost);
3931       if (tem)
3932         return tem;
3933
3934       tem = convert_to_mode (wider_mode, op0, unsignedp);
3935       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3936       tem = extract_high_half (mode, tem);
3937
3938       /* Adjust result for signedness.  */
3939       if (sign_adjust)
3940         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3941
3942       return tem;
3943     }
3944   return expmed_mult_highpart_optab (mode, op0, op1, target,
3945                                      unsignedp, max_cost);
3946 }
3947
3948
3949 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3950
3951 static rtx
3952 expand_smod_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
3953 {
3954   rtx result, temp, shift;
3955   rtx_code_label *label;
3956   int logd;
3957   int prec = GET_MODE_PRECISION (mode);
3958
3959   logd = floor_log2 (d);
3960   result = gen_reg_rtx (mode);
3961
3962   /* Avoid conditional branches when they're expensive.  */
3963   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3964       && optimize_insn_for_speed_p ())
3965     {
3966       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3967                                       mode, 0, -1);
3968       if (signmask)
3969         {
3970           HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
3971           signmask = force_reg (mode, signmask);
3972           shift = gen_int_shift_amount (mode, GET_MODE_BITSIZE (mode) - logd);
3973
3974           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3975              which instruction sequence to use.  If logical right shifts
3976              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3977              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3978
3979           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3980           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3981               || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
3982                   > COSTS_N_INSNS (2)))
3983             {
3984               temp = expand_binop (mode, xor_optab, op0, signmask,
3985                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3986               temp = expand_binop (mode, sub_optab, temp, signmask,
3987                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3988               temp = expand_binop (mode, and_optab, temp,
3989                                    gen_int_mode (masklow, mode),
3990                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3991               temp = expand_binop (mode, xor_optab, temp, signmask,
3992                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3993               temp = expand_binop (mode, sub_optab, temp, signmask,
3994                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3995             }
3996           else
3997             {
3998               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3999                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
4000               signmask = force_reg (mode, signmask);
4001
4002               temp = expand_binop (mode, add_optab, op0, signmask,
4003                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4004               temp = expand_binop (mode, and_optab, temp,
4005                                    gen_int_mode (masklow, mode),
4006                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4007               temp = expand_binop (mode, sub_optab, temp, signmask,
4008                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4009             }
4010           return temp;
4011         }
4012     }
4013
4014   /* Mask contains the mode's signbit and the significant bits of the
4015      modulus.  By including the signbit in the operation, many targets
4016      can avoid an explicit compare operation in the following comparison
4017      against zero.  */
4018   wide_int mask = wi::mask (logd, false, prec);
4019   mask = wi::set_bit (mask, prec - 1);
4020
4021   temp = expand_binop (mode, and_optab, op0,
4022                        immed_wide_int_const (mask, mode),
4023                        result, 1, OPTAB_LIB_WIDEN);
4024   if (temp != result)
4025     emit_move_insn (result, temp);
4026
4027   label = gen_label_rtx ();
4028   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
4029
4030   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
4031                        0, OPTAB_LIB_WIDEN);
4032
4033   mask = wi::mask (logd, true, prec);
4034   temp = expand_binop (mode, ior_optab, temp,
4035                        immed_wide_int_const (mask, mode),
4036                        result, 1, OPTAB_LIB_WIDEN);
4037   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
4038                        0, OPTAB_LIB_WIDEN);
4039   if (temp != result)
4040     emit_move_insn (result, temp);
4041   emit_label (label);
4042   return result;
4043 }
4044
4045 /* Expand signed division of OP0 by a power of two D in mode MODE.
4046    This routine is only called for positive values of D.  */
4047
4048 static rtx
4049 expand_sdiv_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
4050 {
4051   rtx temp;
4052   rtx_code_label *label;
4053   int logd;
4054
4055   logd = floor_log2 (d);
4056
4057   if (d == 2
4058       && BRANCH_COST (optimize_insn_for_speed_p (),
4059                       false) >= 1)
4060     {
4061       temp = gen_reg_rtx (mode);
4062       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
4063       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4064                            0, OPTAB_LIB_WIDEN);
4065       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4066     }
4067
4068   if (HAVE_conditional_move
4069       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
4070     {
4071       rtx temp2;
4072
4073       start_sequence ();
4074       temp2 = copy_to_mode_reg (mode, op0);
4075       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
4076                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
4077       temp = force_reg (mode, temp);
4078
4079       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
4080       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
4081                                      mode, temp, temp2, mode, 0);
4082       if (temp2)
4083         {
4084           rtx_insn *seq = get_insns ();
4085           end_sequence ();
4086           emit_insn (seq);
4087           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
4088         }
4089       end_sequence ();
4090     }
4091
4092   if (BRANCH_COST (optimize_insn_for_speed_p (),
4093                    false) >= 2)
4094     {
4095       int ushift = GET_MODE_BITSIZE (mode) - logd;
4096
4097       temp = gen_reg_rtx (mode);
4098       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
4099       if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
4100           || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
4101              > COSTS_N_INSNS (1))
4102         temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
4103                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
4104       else
4105         temp = expand_shift (RSHIFT_EXPR, mode, temp,
4106                              ushift, NULL_RTX, 1);
4107       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4108                            0, OPTAB_LIB_WIDEN);
4109       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4110     }
4111
4112   label = gen_label_rtx ();
4113   temp = copy_to_mode_reg (mode, op0);
4114   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
4115   expand_inc (temp, gen_int_mode (d - 1, mode));
4116   emit_label (label);
4117   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4118 }
4119 \f
4120 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
4121    if that is convenient, and returning where the result is.
4122    You may request either the quotient or the remainder as the result;
4123    specify REM_FLAG nonzero to get the remainder.
4124
4125    CODE is the expression code for which kind of division this is;
4126    it controls how rounding is done.  MODE is the machine mode to use.
4127    UNSIGNEDP nonzero means do unsigned division.  */
4128
4129 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4130    and then correct it by or'ing in missing high bits
4131    if result of ANDI is nonzero.
4132    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4133    This could optimize to a bfexts instruction.
4134    But C doesn't use these operations, so their optimizations are
4135    left for later.  */
4136 /* ??? For modulo, we don't actually need the highpart of the first product,
4137    the low part will do nicely.  And for small divisors, the second multiply
4138    can also be a low-part only multiply or even be completely left out.
4139    E.g. to calculate the remainder of a division by 3 with a 32 bit
4140    multiply, multiply with 0x55555556 and extract the upper two bits;
4141    the result is exact for inputs up to 0x1fffffff.
4142    The input range can be reduced by using cross-sum rules.
4143    For odd divisors >= 3, the following table gives right shift counts
4144    so that if a number is shifted by an integer multiple of the given
4145    amount, the remainder stays the same:
4146    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4147    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4148    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4149    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4150    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4151
4152    Cross-sum rules for even numbers can be derived by leaving as many bits
4153    to the right alone as the divisor has zeros to the right.
4154    E.g. if x is an unsigned 32 bit number:
4155    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4156    */
4157
4158 rtx
4159 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4160                rtx op0, rtx op1, rtx target, int unsignedp)
4161 {
4162   machine_mode compute_mode;
4163   rtx tquotient;
4164   rtx quotient = 0, remainder = 0;
4165   rtx_insn *last;
4166   rtx_insn *insn;
4167   optab optab1, optab2;
4168   int op1_is_constant, op1_is_pow2 = 0;
4169   int max_cost, extra_cost;
4170   static HOST_WIDE_INT last_div_const = 0;
4171   bool speed = optimize_insn_for_speed_p ();
4172
4173   op1_is_constant = CONST_INT_P (op1);
4174   if (op1_is_constant)
4175     {
4176       wide_int ext_op1 = rtx_mode_t (op1, mode);
4177       op1_is_pow2 = (wi::popcount (ext_op1) == 1
4178                      || (! unsignedp
4179                          && wi::popcount (wi::neg (ext_op1)) == 1));
4180     }
4181
4182   /*
4183      This is the structure of expand_divmod:
4184
4185      First comes code to fix up the operands so we can perform the operations
4186      correctly and efficiently.
4187
4188      Second comes a switch statement with code specific for each rounding mode.
4189      For some special operands this code emits all RTL for the desired
4190      operation, for other cases, it generates only a quotient and stores it in
4191      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4192      to indicate that it has not done anything.
4193
4194      Last comes code that finishes the operation.  If QUOTIENT is set and
4195      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4196      QUOTIENT is not set, it is computed using trunc rounding.
4197
4198      We try to generate special code for division and remainder when OP1 is a
4199      constant.  If |OP1| = 2**n we can use shifts and some other fast
4200      operations.  For other values of OP1, we compute a carefully selected
4201      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4202      by m.
4203
4204      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4205      half of the product.  Different strategies for generating the product are
4206      implemented in expmed_mult_highpart.
4207
4208      If what we actually want is the remainder, we generate that by another
4209      by-constant multiplication and a subtraction.  */
4210
4211   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4212      code below will malfunction if we are, so check here and handle
4213      the special case if so.  */
4214   if (op1 == const1_rtx)
4215     return rem_flag ? const0_rtx : op0;
4216
4217     /* When dividing by -1, we could get an overflow.
4218      negv_optab can handle overflows.  */
4219   if (! unsignedp && op1 == constm1_rtx)
4220     {
4221       if (rem_flag)
4222         return const0_rtx;
4223       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4224                           ? negv_optab : neg_optab, op0, target, 0);
4225     }
4226
4227   if (target
4228       /* Don't use the function value register as a target
4229          since we have to read it as well as write it,
4230          and function-inlining gets confused by this.  */
4231       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4232           /* Don't clobber an operand while doing a multi-step calculation.  */
4233           || ((rem_flag || op1_is_constant)
4234               && (reg_mentioned_p (target, op0)
4235                   || (MEM_P (op0) && MEM_P (target))))
4236           || reg_mentioned_p (target, op1)
4237           || (MEM_P (op1) && MEM_P (target))))
4238     target = 0;
4239
4240   /* Get the mode in which to perform this computation.  Normally it will
4241      be MODE, but sometimes we can't do the desired operation in MODE.
4242      If so, pick a wider mode in which we can do the operation.  Convert
4243      to that mode at the start to avoid repeated conversions.
4244
4245      First see what operations we need.  These depend on the expression
4246      we are evaluating.  (We assume that divxx3 insns exist under the
4247      same conditions that modxx3 insns and that these insns don't normally
4248      fail.  If these assumptions are not correct, we may generate less
4249      efficient code in some cases.)
4250
4251      Then see if we find a mode in which we can open-code that operation
4252      (either a division, modulus, or shift).  Finally, check for the smallest
4253      mode for which we can do the operation with a library call.  */
4254
4255   /* We might want to refine this now that we have division-by-constant
4256      optimization.  Since expmed_mult_highpart tries so many variants, it is
4257      not straightforward to generalize this.  Maybe we should make an array
4258      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4259
4260   optab1 = (op1_is_pow2
4261             ? (unsignedp ? lshr_optab : ashr_optab)
4262             : (unsignedp ? udiv_optab : sdiv_optab));
4263   optab2 = (op1_is_pow2 ? optab1
4264             : (unsignedp ? udivmod_optab : sdivmod_optab));
4265
4266   FOR_EACH_MODE_FROM (compute_mode, mode)
4267     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4268         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4269       break;
4270
4271   if (compute_mode == VOIDmode)
4272     FOR_EACH_MODE_FROM (compute_mode, mode)
4273       if (optab_libfunc (optab1, compute_mode)
4274           || optab_libfunc (optab2, compute_mode))
4275         break;
4276
4277   /* If we still couldn't find a mode, use MODE, but expand_binop will
4278      probably die.  */
4279   if (compute_mode == VOIDmode)
4280     compute_mode = mode;
4281
4282   if (target && GET_MODE (target) == compute_mode)
4283     tquotient = target;
4284   else
4285     tquotient = gen_reg_rtx (compute_mode);
4286
4287 #if 0
4288   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4289      (mode), and thereby get better code when OP1 is a constant.  Do that
4290      later.  It will require going over all usages of SIZE below.  */
4291   size = GET_MODE_BITSIZE (mode);
4292 #endif
4293
4294   /* Only deduct something for a REM if the last divide done was
4295      for a different constant.   Then set the constant of the last
4296      divide.  */
4297   max_cost = (unsignedp
4298               ? udiv_cost (speed, compute_mode)
4299               : sdiv_cost (speed, compute_mode));
4300   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4301                      && INTVAL (op1) == last_div_const))
4302     max_cost -= (mul_cost (speed, compute_mode)
4303                  + add_cost (speed, compute_mode));
4304
4305   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4306
4307   /* Now convert to the best mode to use.  */
4308   if (compute_mode != mode)
4309     {
4310       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4311       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4312
4313       /* convert_modes may have placed op1 into a register, so we
4314          must recompute the following.  */
4315       op1_is_constant = CONST_INT_P (op1);
4316       if (op1_is_constant)
4317         {
4318           wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4319           op1_is_pow2 = (wi::popcount (ext_op1) == 1
4320                          || (! unsignedp
4321                              && wi::popcount (wi::neg (ext_op1)) == 1));
4322         }
4323       else
4324         op1_is_pow2 = 0;
4325     }
4326
4327   /* If one of the operands is a volatile MEM, copy it into a register.  */
4328
4329   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4330     op0 = force_reg (compute_mode, op0);
4331   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4332     op1 = force_reg (compute_mode, op1);
4333
4334   /* If we need the remainder or if OP1 is constant, we need to
4335      put OP0 in a register in case it has any queued subexpressions.  */
4336   if (rem_flag || op1_is_constant)
4337     op0 = force_reg (compute_mode, op0);
4338
4339   last = get_last_insn ();
4340
4341   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4342   if (unsignedp)
4343     {
4344       if (code == FLOOR_DIV_EXPR)
4345         code = TRUNC_DIV_EXPR;
4346       if (code == FLOOR_MOD_EXPR)
4347         code = TRUNC_MOD_EXPR;
4348       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4349         code = TRUNC_DIV_EXPR;
4350     }
4351
4352   if (op1 != const0_rtx)
4353     switch (code)
4354       {
4355       case TRUNC_MOD_EXPR:
4356       case TRUNC_DIV_EXPR:
4357         if (op1_is_constant)
4358           {
4359             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4360             int size = GET_MODE_BITSIZE (int_mode);
4361             if (unsignedp)
4362               {
4363                 unsigned HOST_WIDE_INT mh, ml;
4364                 int pre_shift, post_shift;
4365                 int dummy;
4366                 wide_int wd = rtx_mode_t (op1, int_mode);
4367                 unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4368
4369                 if (wi::popcount (wd) == 1)
4370                   {
4371                     pre_shift = floor_log2 (d);
4372                     if (rem_flag)
4373                       {
4374                         unsigned HOST_WIDE_INT mask
4375                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4376                         remainder
4377                           = expand_binop (int_mode, and_optab, op0,
4378                                           gen_int_mode (mask, int_mode),
4379                                           remainder, 1,
4380                                           OPTAB_LIB_WIDEN);
4381                         if (remainder)
4382                           return gen_lowpart (mode, remainder);
4383                       }
4384                     quotient = expand_shift (RSHIFT_EXPR, int_mode, op0,
4385                                              pre_shift, tquotient, 1);
4386                   }
4387                 else if (size <= HOST_BITS_PER_WIDE_INT)
4388                   {
4389                     if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4390                       {
4391                         /* Most significant bit of divisor is set; emit an scc
4392                            insn.  */
4393                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4394                                                           int_mode, 1, 1);
4395                       }
4396                     else
4397                       {
4398                         /* Find a suitable multiplier and right shift count
4399                            instead of multiplying with D.  */
4400
4401                         mh = choose_multiplier (d, size, size,
4402                                                 &ml, &post_shift, &dummy);
4403
4404                         /* If the suggested multiplier is more than SIZE bits,
4405                            we can do better for even divisors, using an
4406                            initial right shift.  */
4407                         if (mh != 0 && (d & 1) == 0)
4408                           {
4409                             pre_shift = ctz_or_zero (d);
4410                             mh = choose_multiplier (d >> pre_shift, size,
4411                                                     size - pre_shift,
4412                                                     &ml, &post_shift, &dummy);
4413                             gcc_assert (!mh);
4414                           }
4415                         else
4416                           pre_shift = 0;
4417
4418                         if (mh != 0)
4419                           {
4420                             rtx t1, t2, t3, t4;
4421
4422                             if (post_shift - 1 >= BITS_PER_WORD)
4423                               goto fail1;
4424
4425                             extra_cost
4426                               = (shift_cost (speed, int_mode, post_shift - 1)
4427                                  + shift_cost (speed, int_mode, 1)
4428                                  + 2 * add_cost (speed, int_mode));
4429                             t1 = expmed_mult_highpart
4430                               (int_mode, op0, gen_int_mode (ml, int_mode),
4431                                NULL_RTX, 1, max_cost - extra_cost);
4432                             if (t1 == 0)
4433                               goto fail1;
4434                             t2 = force_operand (gen_rtx_MINUS (int_mode,
4435                                                                op0, t1),
4436                                                 NULL_RTX);
4437                             t3 = expand_shift (RSHIFT_EXPR, int_mode,
4438                                                t2, 1, NULL_RTX, 1);
4439                             t4 = force_operand (gen_rtx_PLUS (int_mode,
4440                                                               t1, t3),
4441                                                 NULL_RTX);
4442                             quotient = expand_shift
4443                               (RSHIFT_EXPR, int_mode, t4,
4444                                post_shift - 1, tquotient, 1);
4445                           }
4446                         else
4447                           {
4448                             rtx t1, t2;
4449
4450                             if (pre_shift >= BITS_PER_WORD
4451                                 || post_shift >= BITS_PER_WORD)
4452                               goto fail1;
4453
4454                             t1 = expand_shift
4455                               (RSHIFT_EXPR, int_mode, op0,
4456                                pre_shift, NULL_RTX, 1);
4457                             extra_cost
4458                               = (shift_cost (speed, int_mode, pre_shift)
4459                                  + shift_cost (speed, int_mode, post_shift));
4460                             t2 = expmed_mult_highpart
4461                               (int_mode, t1,
4462                                gen_int_mode (ml, int_mode),
4463                                NULL_RTX, 1, max_cost - extra_cost);
4464                             if (t2 == 0)
4465                               goto fail1;
4466                             quotient = expand_shift
4467                               (RSHIFT_EXPR, int_mode, t2,
4468                                post_shift, tquotient, 1);
4469                           }
4470                       }
4471                   }
4472                 else            /* Too wide mode to use tricky code */
4473                   break;
4474
4475                 insn = get_last_insn ();
4476                 if (insn != last)
4477                   set_dst_reg_note (insn, REG_EQUAL,
4478                                     gen_rtx_UDIV (int_mode, op0, op1),
4479                                     quotient);
4480               }
4481             else                /* TRUNC_DIV, signed */
4482               {
4483                 unsigned HOST_WIDE_INT ml;
4484                 int lgup, post_shift;
4485                 rtx mlr;
4486                 HOST_WIDE_INT d = INTVAL (op1);
4487                 unsigned HOST_WIDE_INT abs_d;
4488
4489                 /* Since d might be INT_MIN, we have to cast to
4490                    unsigned HOST_WIDE_INT before negating to avoid
4491                    undefined signed overflow.  */
4492                 abs_d = (d >= 0
4493                          ? (unsigned HOST_WIDE_INT) d
4494                          : - (unsigned HOST_WIDE_INT) d);
4495
4496                 /* n rem d = n rem -d */
4497                 if (rem_flag && d < 0)
4498                   {
4499                     d = abs_d;
4500                     op1 = gen_int_mode (abs_d, int_mode);
4501                   }
4502
4503                 if (d == 1)
4504                   quotient = op0;
4505                 else if (d == -1)
4506                   quotient = expand_unop (int_mode, neg_optab, op0,
4507                                           tquotient, 0);
4508                 else if (size <= HOST_BITS_PER_WIDE_INT
4509                          && abs_d == HOST_WIDE_INT_1U << (size - 1))
4510                   {
4511                     /* This case is not handled correctly below.  */
4512                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4513                                                 int_mode, 1, 1);
4514                     if (quotient == 0)
4515                       goto fail1;
4516                   }
4517                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4518                          && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4519                          && (rem_flag
4520                              ? smod_pow2_cheap (speed, int_mode)
4521                              : sdiv_pow2_cheap (speed, int_mode))
4522                          /* We assume that cheap metric is true if the
4523                             optab has an expander for this mode.  */
4524                          && ((optab_handler ((rem_flag ? smod_optab
4525                                               : sdiv_optab),
4526                                              int_mode)
4527                               != CODE_FOR_nothing)
4528                              || (optab_handler (sdivmod_optab, int_mode)
4529                                  != CODE_FOR_nothing)))
4530                   ;
4531                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d)
4532                          && (size <= HOST_BITS_PER_WIDE_INT
4533                              || abs_d != (unsigned HOST_WIDE_INT) d))
4534                   {
4535                     if (rem_flag)
4536                       {
4537                         remainder = expand_smod_pow2 (int_mode, op0, d);
4538                         if (remainder)
4539                           return gen_lowpart (mode, remainder);
4540                       }
4541
4542                     if (sdiv_pow2_cheap (speed, int_mode)
4543                         && ((optab_handler (sdiv_optab, int_mode)
4544                              != CODE_FOR_nothing)
4545                             || (optab_handler (sdivmod_optab, int_mode)
4546                                 != CODE_FOR_nothing)))
4547                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4548                                                 int_mode, op0,
4549                                                 gen_int_mode (abs_d,
4550                                                               int_mode),
4551                                                 NULL_RTX, 0);
4552                     else
4553                       quotient = expand_sdiv_pow2 (int_mode, op0, abs_d);
4554
4555                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4556                        negate the quotient.  */
4557                     if (d < 0)
4558                       {
4559                         insn = get_last_insn ();
4560                         if (insn != last
4561                             && abs_d < (HOST_WIDE_INT_1U
4562                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4563                           set_dst_reg_note (insn, REG_EQUAL,
4564                                             gen_rtx_DIV (int_mode, op0,
4565                                                          gen_int_mode
4566                                                            (abs_d,
4567                                                             int_mode)),
4568                                             quotient);
4569
4570                         quotient = expand_unop (int_mode, neg_optab,
4571                                                 quotient, quotient, 0);
4572                       }
4573                   }
4574                 else if (size <= HOST_BITS_PER_WIDE_INT)
4575                   {
4576                     choose_multiplier (abs_d, size, size - 1,
4577                                        &ml, &post_shift, &lgup);
4578                     if (ml < HOST_WIDE_INT_1U << (size - 1))
4579                       {
4580                         rtx t1, t2, t3;
4581
4582                         if (post_shift >= BITS_PER_WORD
4583                             || size - 1 >= BITS_PER_WORD)
4584                           goto fail1;
4585
4586                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4587                                       + shift_cost (speed, int_mode, size - 1)
4588                                       + add_cost (speed, int_mode));
4589                         t1 = expmed_mult_highpart
4590                           (int_mode, op0, gen_int_mode (ml, int_mode),
4591                            NULL_RTX, 0, max_cost - extra_cost);
4592                         if (t1 == 0)
4593                           goto fail1;
4594                         t2 = expand_shift
4595                           (RSHIFT_EXPR, int_mode, t1,
4596                            post_shift, NULL_RTX, 0);
4597                         t3 = expand_shift
4598                           (RSHIFT_EXPR, int_mode, op0,
4599                            size - 1, NULL_RTX, 0);
4600                         if (d < 0)
4601                           quotient
4602                             = force_operand (gen_rtx_MINUS (int_mode, t3, t2),
4603                                              tquotient);
4604                         else
4605                           quotient
4606                             = force_operand (gen_rtx_MINUS (int_mode, t2, t3),
4607                                              tquotient);
4608                       }
4609                     else
4610                       {
4611                         rtx t1, t2, t3, t4;
4612
4613                         if (post_shift >= BITS_PER_WORD
4614                             || size - 1 >= BITS_PER_WORD)
4615                           goto fail1;
4616
4617                         ml |= HOST_WIDE_INT_M1U << (size - 1);
4618                         mlr = gen_int_mode (ml, int_mode);
4619                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4620                                       + shift_cost (speed, int_mode, size - 1)
4621                                       + 2 * add_cost (speed, int_mode));
4622                         t1 = expmed_mult_highpart (int_mode, op0, mlr,
4623                                                    NULL_RTX, 0,
4624                                                    max_cost - extra_cost);
4625                         if (t1 == 0)
4626                           goto fail1;
4627                         t2 = force_operand (gen_rtx_PLUS (int_mode, t1, op0),
4628                                             NULL_RTX);
4629                         t3 = expand_shift
4630                           (RSHIFT_EXPR, int_mode, t2,
4631                            post_shift, NULL_RTX, 0);
4632                         t4 = expand_shift
4633                           (RSHIFT_EXPR, int_mode, op0,
4634                            size - 1, NULL_RTX, 0);
4635                         if (d < 0)
4636                           quotient
4637                             = force_operand (gen_rtx_MINUS (int_mode, t4, t3),
4638                                              tquotient);
4639                         else
4640                           quotient
4641                             = force_operand (gen_rtx_MINUS (int_mode, t3, t4),
4642                                              tquotient);
4643                       }
4644                   }
4645                 else            /* Too wide mode to use tricky code */
4646                   break;
4647
4648                 insn = get_last_insn ();
4649                 if (insn != last)
4650                   set_dst_reg_note (insn, REG_EQUAL,
4651                                     gen_rtx_DIV (int_mode, op0, op1),
4652                                     quotient);
4653               }
4654             break;
4655           }
4656       fail1:
4657         delete_insns_since (last);
4658         break;
4659
4660       case FLOOR_DIV_EXPR:
4661       case FLOOR_MOD_EXPR:
4662       /* We will come here only for signed operations.  */
4663         if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4664           {
4665             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4666             int size = GET_MODE_BITSIZE (int_mode);
4667             unsigned HOST_WIDE_INT mh, ml;
4668             int pre_shift, lgup, post_shift;
4669             HOST_WIDE_INT d = INTVAL (op1);
4670
4671             if (d > 0)
4672               {
4673                 /* We could just as easily deal with negative constants here,
4674                    but it does not seem worth the trouble for GCC 2.6.  */
4675                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4676                   {
4677                     pre_shift = floor_log2 (d);
4678                     if (rem_flag)
4679                       {
4680                         unsigned HOST_WIDE_INT mask
4681                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4682                         remainder = expand_binop
4683                           (int_mode, and_optab, op0,
4684                            gen_int_mode (mask, int_mode),
4685                            remainder, 0, OPTAB_LIB_WIDEN);
4686                         if (remainder)
4687                           return gen_lowpart (mode, remainder);
4688                       }
4689                     quotient = expand_shift
4690                       (RSHIFT_EXPR, int_mode, op0,
4691                        pre_shift, tquotient, 0);
4692                   }
4693                 else
4694                   {
4695                     rtx t1, t2, t3, t4;
4696
4697                     mh = choose_multiplier (d, size, size - 1,
4698                                             &ml, &post_shift, &lgup);
4699                     gcc_assert (!mh);
4700
4701                     if (post_shift < BITS_PER_WORD
4702                         && size - 1 < BITS_PER_WORD)
4703                       {
4704                         t1 = expand_shift
4705                           (RSHIFT_EXPR, int_mode, op0,
4706                            size - 1, NULL_RTX, 0);
4707                         t2 = expand_binop (int_mode, xor_optab, op0, t1,
4708                                            NULL_RTX, 0, OPTAB_WIDEN);
4709                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4710                                       + shift_cost (speed, int_mode, size - 1)
4711                                       + 2 * add_cost (speed, int_mode));
4712                         t3 = expmed_mult_highpart
4713                           (int_mode, t2, gen_int_mode (ml, int_mode),
4714                            NULL_RTX, 1, max_cost - extra_cost);
4715                         if (t3 != 0)
4716                           {
4717                             t4 = expand_shift
4718                               (RSHIFT_EXPR, int_mode, t3,
4719                                post_shift, NULL_RTX, 1);
4720                             quotient = expand_binop (int_mode, xor_optab,
4721                                                      t4, t1, tquotient, 0,
4722                                                      OPTAB_WIDEN);
4723                           }
4724                       }
4725                   }
4726               }
4727             else
4728               {
4729                 rtx nsign, t1, t2, t3, t4;
4730                 t1 = force_operand (gen_rtx_PLUS (int_mode,
4731                                                   op0, constm1_rtx), NULL_RTX);
4732                 t2 = expand_binop (int_mode, ior_optab, op0, t1, NULL_RTX,
4733                                    0, OPTAB_WIDEN);
4734                 nsign = expand_shift (RSHIFT_EXPR, int_mode, t2,
4735                                       size - 1, NULL_RTX, 0);
4736                 t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign),
4737                                     NULL_RTX);
4738                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, t3, op1,
4739                                     NULL_RTX, 0);
4740                 if (t4)
4741                   {
4742                     rtx t5;
4743                     t5 = expand_unop (int_mode, one_cmpl_optab, nsign,
4744                                       NULL_RTX, 0);
4745                     quotient = force_operand (gen_rtx_PLUS (int_mode, t4, t5),
4746                                               tquotient);
4747                   }
4748               }
4749           }
4750
4751         if (quotient != 0)
4752           break;
4753         delete_insns_since (last);
4754
4755         /* Try using an instruction that produces both the quotient and
4756            remainder, using truncation.  We can easily compensate the quotient
4757            or remainder to get floor rounding, once we have the remainder.
4758            Notice that we compute also the final remainder value here,
4759            and return the result right away.  */
4760         if (target == 0 || GET_MODE (target) != compute_mode)
4761           target = gen_reg_rtx (compute_mode);
4762
4763         if (rem_flag)
4764           {
4765             remainder
4766               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4767             quotient = gen_reg_rtx (compute_mode);
4768           }
4769         else
4770           {
4771             quotient
4772               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4773             remainder = gen_reg_rtx (compute_mode);
4774           }
4775
4776         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4777                                  quotient, remainder, 0))
4778           {
4779             /* This could be computed with a branch-less sequence.
4780                Save that for later.  */
4781             rtx tem;
4782             rtx_code_label *label = gen_label_rtx ();
4783             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4784             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4785                                 NULL_RTX, 0, OPTAB_WIDEN);
4786             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4787             expand_dec (quotient, const1_rtx);
4788             expand_inc (remainder, op1);
4789             emit_label (label);
4790             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4791           }
4792
4793         /* No luck with division elimination or divmod.  Have to do it
4794            by conditionally adjusting op0 *and* the result.  */
4795         {
4796           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4797           rtx adjusted_op0;
4798           rtx tem;
4799
4800           quotient = gen_reg_rtx (compute_mode);
4801           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4802           label1 = gen_label_rtx ();
4803           label2 = gen_label_rtx ();
4804           label3 = gen_label_rtx ();
4805           label4 = gen_label_rtx ();
4806           label5 = gen_label_rtx ();
4807           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4808           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4809           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4810                               quotient, 0, OPTAB_LIB_WIDEN);
4811           if (tem != quotient)
4812             emit_move_insn (quotient, tem);
4813           emit_jump_insn (targetm.gen_jump (label5));
4814           emit_barrier ();
4815           emit_label (label1);
4816           expand_inc (adjusted_op0, const1_rtx);
4817           emit_jump_insn (targetm.gen_jump (label4));
4818           emit_barrier ();
4819           emit_label (label2);
4820           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4821           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4822                               quotient, 0, OPTAB_LIB_WIDEN);
4823           if (tem != quotient)
4824             emit_move_insn (quotient, tem);
4825           emit_jump_insn (targetm.gen_jump (label5));
4826           emit_barrier ();
4827           emit_label (label3);
4828           expand_dec (adjusted_op0, const1_rtx);
4829           emit_label (label4);
4830           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4831                               quotient, 0, OPTAB_LIB_WIDEN);
4832           if (tem != quotient)
4833             emit_move_insn (quotient, tem);
4834           expand_dec (quotient, const1_rtx);
4835           emit_label (label5);
4836         }
4837         break;
4838
4839       case CEIL_DIV_EXPR:
4840       case CEIL_MOD_EXPR:
4841         if (unsignedp)
4842           {
4843             if (op1_is_constant
4844                 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4845                 && (HWI_COMPUTABLE_MODE_P (compute_mode)
4846                     || INTVAL (op1) >= 0))
4847               {
4848                 scalar_int_mode int_mode
4849                   = as_a <scalar_int_mode> (compute_mode);
4850                 rtx t1, t2, t3;
4851                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4852                 t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4853                                    floor_log2 (d), tquotient, 1);
4854                 t2 = expand_binop (int_mode, and_optab, op0,
4855                                    gen_int_mode (d - 1, int_mode),
4856                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4857                 t3 = gen_reg_rtx (int_mode);
4858                 t3 = emit_store_flag (t3, NE, t2, const0_rtx, int_mode, 1, 1);
4859                 if (t3 == 0)
4860                   {
4861                     rtx_code_label *lab;
4862                     lab = gen_label_rtx ();
4863                     do_cmp_and_jump (t2, const0_rtx, EQ, int_mode, lab);
4864                     expand_inc (t1, const1_rtx);
4865                     emit_label (lab);
4866                     quotient = t1;
4867                   }
4868                 else
4869                   quotient = force_operand (gen_rtx_PLUS (int_mode, t1, t3),
4870                                             tquotient);
4871                 break;
4872               }
4873
4874             /* Try using an instruction that produces both the quotient and
4875                remainder, using truncation.  We can easily compensate the
4876                quotient or remainder to get ceiling rounding, once we have the
4877                remainder.  Notice that we compute also the final remainder
4878                value here, and return the result right away.  */
4879             if (target == 0 || GET_MODE (target) != compute_mode)
4880               target = gen_reg_rtx (compute_mode);
4881
4882             if (rem_flag)
4883               {
4884                 remainder = (REG_P (target)
4885                              ? target : gen_reg_rtx (compute_mode));
4886                 quotient = gen_reg_rtx (compute_mode);
4887               }
4888             else
4889               {
4890                 quotient = (REG_P (target)
4891                             ? target : gen_reg_rtx (compute_mode));
4892                 remainder = gen_reg_rtx (compute_mode);
4893               }
4894
4895             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4896                                      remainder, 1))
4897               {
4898                 /* This could be computed with a branch-less sequence.
4899                    Save that for later.  */
4900                 rtx_code_label *label = gen_label_rtx ();
4901                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4902                                  compute_mode, label);
4903                 expand_inc (quotient, const1_rtx);
4904                 expand_dec (remainder, op1);
4905                 emit_label (label);
4906                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4907               }
4908
4909             /* No luck with division elimination or divmod.  Have to do it
4910                by conditionally adjusting op0 *and* the result.  */
4911             {
4912               rtx_code_label *label1, *label2;
4913               rtx adjusted_op0, tem;
4914
4915               quotient = gen_reg_rtx (compute_mode);
4916               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4917               label1 = gen_label_rtx ();
4918               label2 = gen_label_rtx ();
4919               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4920                                compute_mode, label1);
4921               emit_move_insn  (quotient, const0_rtx);
4922               emit_jump_insn (targetm.gen_jump (label2));
4923               emit_barrier ();
4924               emit_label (label1);
4925               expand_dec (adjusted_op0, const1_rtx);
4926               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4927                                   quotient, 1, OPTAB_LIB_WIDEN);
4928               if (tem != quotient)
4929                 emit_move_insn (quotient, tem);
4930               expand_inc (quotient, const1_rtx);
4931               emit_label (label2);
4932             }
4933           }
4934         else /* signed */
4935           {
4936             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4937                 && INTVAL (op1) >= 0)
4938               {
4939                 /* This is extremely similar to the code for the unsigned case
4940                    above.  For 2.7 we should merge these variants, but for
4941                    2.6.1 I don't want to touch the code for unsigned since that
4942                    get used in C.  The signed case will only be used by other
4943                    languages (Ada).  */
4944
4945                 rtx t1, t2, t3;
4946                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4947                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4948                                    floor_log2 (d), tquotient, 0);
4949                 t2 = expand_binop (compute_mode, and_optab, op0,
4950                                    gen_int_mode (d - 1, compute_mode),
4951                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4952                 t3 = gen_reg_rtx (compute_mode);
4953                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4954                                       compute_mode, 1, 1);
4955                 if (t3 == 0)
4956                   {
4957                     rtx_code_label *lab;
4958                     lab = gen_label_rtx ();
4959                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4960                     expand_inc (t1, const1_rtx);
4961                     emit_label (lab);
4962                     quotient = t1;
4963                   }
4964                 else
4965                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4966                                                           t1, t3),
4967                                             tquotient);
4968                 break;
4969               }
4970
4971             /* Try using an instruction that produces both the quotient and
4972                remainder, using truncation.  We can easily compensate the
4973                quotient or remainder to get ceiling rounding, once we have the
4974                remainder.  Notice that we compute also the final remainder
4975                value here, and return the result right away.  */
4976             if (target == 0 || GET_MODE (target) != compute_mode)
4977               target = gen_reg_rtx (compute_mode);
4978             if (rem_flag)
4979               {
4980                 remainder= (REG_P (target)
4981                             ? target : gen_reg_rtx (compute_mode));
4982                 quotient = gen_reg_rtx (compute_mode);
4983               }
4984             else
4985               {
4986                 quotient = (REG_P (target)
4987                             ? target : gen_reg_rtx (compute_mode));
4988                 remainder = gen_reg_rtx (compute_mode);
4989               }
4990
4991             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4992                                      remainder, 0))
4993               {
4994                 /* This could be computed with a branch-less sequence.
4995                    Save that for later.  */
4996                 rtx tem;
4997                 rtx_code_label *label = gen_label_rtx ();
4998                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4999                                  compute_mode, label);
5000                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
5001                                     NULL_RTX, 0, OPTAB_WIDEN);
5002                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
5003                 expand_inc (quotient, const1_rtx);
5004                 expand_dec (remainder, op1);
5005                 emit_label (label);
5006                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5007               }
5008
5009             /* No luck with division elimination or divmod.  Have to do it
5010                by conditionally adjusting op0 *and* the result.  */
5011             {
5012               rtx_code_label *label1, *label2, *label3, *label4, *label5;
5013               rtx adjusted_op0;
5014               rtx tem;
5015
5016               quotient = gen_reg_rtx (compute_mode);
5017               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
5018               label1 = gen_label_rtx ();
5019               label2 = gen_label_rtx ();
5020               label3 = gen_label_rtx ();
5021               label4 = gen_label_rtx ();
5022               label5 = gen_label_rtx ();
5023               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
5024               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
5025                                compute_mode, label1);
5026               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5027                                   quotient, 0, OPTAB_LIB_WIDEN);
5028               if (tem != quotient)
5029                 emit_move_insn (quotient, tem);
5030               emit_jump_insn (targetm.gen_jump (label5));
5031               emit_barrier ();
5032               emit_label (label1);
5033               expand_dec (adjusted_op0, const1_rtx);
5034               emit_jump_insn (targetm.gen_jump (label4));
5035               emit_barrier ();
5036               emit_label (label2);
5037               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
5038                                compute_mode, label3);
5039               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5040                                   quotient, 0, OPTAB_LIB_WIDEN);
5041               if (tem != quotient)
5042                 emit_move_insn (quotient, tem);
5043               emit_jump_insn (targetm.gen_jump (label5));
5044               emit_barrier ();
5045               emit_label (label3);
5046               expand_inc (adjusted_op0, const1_rtx);
5047               emit_label (label4);
5048               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5049                                   quotient, 0, OPTAB_LIB_WIDEN);
5050               if (tem != quotient)
5051                 emit_move_insn (quotient, tem);
5052               expand_inc (quotient, const1_rtx);
5053               emit_label (label5);
5054             }
5055           }
5056         break;
5057
5058       case EXACT_DIV_EXPR:
5059         if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
5060           {
5061             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5062             int size = GET_MODE_BITSIZE (int_mode);
5063             HOST_WIDE_INT d = INTVAL (op1);
5064             unsigned HOST_WIDE_INT ml;
5065             int pre_shift;
5066             rtx t1;
5067
5068             pre_shift = ctz_or_zero (d);
5069             ml = invert_mod2n (d >> pre_shift, size);
5070             t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5071                                pre_shift, NULL_RTX, unsignedp);
5072             quotient = expand_mult (int_mode, t1, gen_int_mode (ml, int_mode),
5073                                     NULL_RTX, 1);
5074
5075             insn = get_last_insn ();
5076             set_dst_reg_note (insn, REG_EQUAL,
5077                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5078                                               int_mode, op0, op1),
5079                               quotient);
5080           }
5081         break;
5082
5083       case ROUND_DIV_EXPR:
5084       case ROUND_MOD_EXPR:
5085         if (unsignedp)
5086           {
5087             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5088             rtx tem;
5089             rtx_code_label *label;
5090             label = gen_label_rtx ();
5091             quotient = gen_reg_rtx (int_mode);
5092             remainder = gen_reg_rtx (int_mode);
5093             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
5094               {
5095                 rtx tem;
5096                 quotient = expand_binop (int_mode, udiv_optab, op0, op1,
5097                                          quotient, 1, OPTAB_LIB_WIDEN);
5098                 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 1);
5099                 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5100                                           remainder, 1, OPTAB_LIB_WIDEN);
5101               }
5102             tem = plus_constant (int_mode, op1, -1);
5103             tem = expand_shift (RSHIFT_EXPR, int_mode, tem, 1, NULL_RTX, 1);
5104             do_cmp_and_jump (remainder, tem, LEU, int_mode, label);
5105             expand_inc (quotient, const1_rtx);
5106             expand_dec (remainder, op1);
5107             emit_label (label);
5108           }
5109         else
5110           {
5111             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5112             int size = GET_MODE_BITSIZE (int_mode);
5113             rtx abs_rem, abs_op1, tem, mask;
5114             rtx_code_label *label;
5115             label = gen_label_rtx ();
5116             quotient = gen_reg_rtx (int_mode);
5117             remainder = gen_reg_rtx (int_mode);
5118             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
5119               {
5120                 rtx tem;
5121                 quotient = expand_binop (int_mode, sdiv_optab, op0, op1,
5122                                          quotient, 0, OPTAB_LIB_WIDEN);
5123                 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 0);
5124                 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5125                                           remainder, 0, OPTAB_LIB_WIDEN);
5126               }
5127             abs_rem = expand_abs (int_mode, remainder, NULL_RTX, 1, 0);
5128             abs_op1 = expand_abs (int_mode, op1, NULL_RTX, 1, 0);
5129             tem = expand_shift (LSHIFT_EXPR, int_mode, abs_rem,
5130                                 1, NULL_RTX, 1);
5131             do_cmp_and_jump (tem, abs_op1, LTU, int_mode, label);
5132             tem = expand_binop (int_mode, xor_optab, op0, op1,
5133                                 NULL_RTX, 0, OPTAB_WIDEN);
5134             mask = expand_shift (RSHIFT_EXPR, int_mode, tem,
5135                                  size - 1, NULL_RTX, 0);
5136             tem = expand_binop (int_mode, xor_optab, mask, const1_rtx,
5137                                 NULL_RTX, 0, OPTAB_WIDEN);
5138             tem = expand_binop (int_mode, sub_optab, tem, mask,
5139                                 NULL_RTX, 0, OPTAB_WIDEN);
5140             expand_inc (quotient, tem);
5141             tem = expand_binop (int_mode, xor_optab, mask, op1,
5142                                 NULL_RTX, 0, OPTAB_WIDEN);
5143             tem = expand_binop (int_mode, sub_optab, tem, mask,
5144                                 NULL_RTX, 0, OPTAB_WIDEN);
5145             expand_dec (remainder, tem);
5146             emit_label (label);
5147           }
5148         return gen_lowpart (mode, rem_flag ? remainder : quotient);
5149
5150       default:
5151         gcc_unreachable ();
5152       }
5153
5154   if (quotient == 0)
5155     {
5156       if (target && GET_MODE (target) != compute_mode)
5157         target = 0;
5158
5159       if (rem_flag)
5160         {
5161           /* Try to produce the remainder without producing the quotient.
5162              If we seem to have a divmod pattern that does not require widening,
5163              don't try widening here.  We should really have a WIDEN argument
5164              to expand_twoval_binop, since what we'd really like to do here is
5165              1) try a mod insn in compute_mode
5166              2) try a divmod insn in compute_mode
5167              3) try a div insn in compute_mode and multiply-subtract to get
5168                 remainder
5169              4) try the same things with widening allowed.  */
5170           remainder
5171             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5172                                  op0, op1, target,
5173                                  unsignedp,
5174                                  ((optab_handler (optab2, compute_mode)
5175                                    != CODE_FOR_nothing)
5176                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
5177           if (remainder == 0)
5178             {
5179               /* No luck there.  Can we do remainder and divide at once
5180                  without a library call?  */
5181               remainder = gen_reg_rtx (compute_mode);
5182               if (! expand_twoval_binop ((unsignedp
5183                                           ? udivmod_optab
5184                                           : sdivmod_optab),
5185                                          op0, op1,
5186                                          NULL_RTX, remainder, unsignedp))
5187                 remainder = 0;
5188             }
5189
5190           if (remainder)
5191             return gen_lowpart (mode, remainder);
5192         }
5193
5194       /* Produce the quotient.  Try a quotient insn, but not a library call.
5195          If we have a divmod in this mode, use it in preference to widening
5196          the div (for this test we assume it will not fail). Note that optab2
5197          is set to the one of the two optabs that the call below will use.  */
5198       quotient
5199         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5200                              op0, op1, rem_flag ? NULL_RTX : target,
5201                              unsignedp,
5202                              ((optab_handler (optab2, compute_mode)
5203                                != CODE_FOR_nothing)
5204                               ? OPTAB_DIRECT : OPTAB_WIDEN));
5205
5206       if (quotient == 0)
5207         {
5208           /* No luck there.  Try a quotient-and-remainder insn,
5209              keeping the quotient alone.  */
5210           quotient = gen_reg_rtx (compute_mode);
5211           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5212                                      op0, op1,
5213                                      quotient, NULL_RTX, unsignedp))
5214             {
5215               quotient = 0;
5216               if (! rem_flag)
5217                 /* Still no luck.  If we are not computing the remainder,
5218                    use a library call for the quotient.  */
5219                 quotient = sign_expand_binop (compute_mode,
5220                                               udiv_optab, sdiv_optab,
5221                                               op0, op1, target,
5222                                               unsignedp, OPTAB_LIB_WIDEN);
5223             }
5224         }
5225     }
5226
5227   if (rem_flag)
5228     {
5229       if (target && GET_MODE (target) != compute_mode)
5230         target = 0;
5231
5232       if (quotient == 0)
5233         {
5234           /* No divide instruction either.  Use library for remainder.  */
5235           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5236                                          op0, op1, target,
5237                                          unsignedp, OPTAB_LIB_WIDEN);
5238           /* No remainder function.  Try a quotient-and-remainder
5239              function, keeping the remainder.  */
5240           if (!remainder)
5241             {
5242               remainder = gen_reg_rtx (compute_mode);
5243               if (!expand_twoval_binop_libfunc
5244                   (unsignedp ? udivmod_optab : sdivmod_optab,
5245                    op0, op1,
5246                    NULL_RTX, remainder,
5247                    unsignedp ? UMOD : MOD))
5248                 remainder = NULL_RTX;
5249             }
5250         }
5251       else
5252         {
5253           /* We divided.  Now finish doing X - Y * (X / Y).  */
5254           remainder = expand_mult (compute_mode, quotient, op1,
5255                                    NULL_RTX, unsignedp);
5256           remainder = expand_binop (compute_mode, sub_optab, op0,
5257                                     remainder, target, unsignedp,
5258                                     OPTAB_LIB_WIDEN);
5259         }
5260     }
5261
5262   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5263 }
5264 \f
5265 /* Return a tree node with data type TYPE, describing the value of X.
5266    Usually this is an VAR_DECL, if there is no obvious better choice.
5267    X may be an expression, however we only support those expressions
5268    generated by loop.c.  */
5269
5270 tree
5271 make_tree (tree type, rtx x)
5272 {
5273   tree t;
5274
5275   switch (GET_CODE (x))
5276     {
5277     case CONST_INT:
5278     case CONST_WIDE_INT:
5279       t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5280       return t;
5281
5282     case CONST_DOUBLE:
5283       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5284       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5285         t = wide_int_to_tree (type,
5286                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5287                                                     HOST_BITS_PER_WIDE_INT * 2));
5288       else
5289         t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5290
5291       return t;
5292
5293     case CONST_VECTOR:
5294       {
5295         unsigned int npatterns = CONST_VECTOR_NPATTERNS (x);
5296         unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
5297         tree itype = TREE_TYPE (type);
5298
5299         /* Build a tree with vector elements.  */
5300         tree_vector_builder elts (type, npatterns, nelts_per_pattern);
5301         unsigned int count = elts.encoded_nelts ();
5302         for (unsigned int i = 0; i < count; ++i)
5303           {
5304             rtx elt = CONST_VECTOR_ELT (x, i);
5305             elts.quick_push (make_tree (itype, elt));
5306           }
5307
5308         return elts.build ();
5309       }
5310
5311     case PLUS:
5312       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5313                           make_tree (type, XEXP (x, 1)));
5314
5315     case MINUS:
5316       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5317                           make_tree (type, XEXP (x, 1)));
5318
5319     case NEG:
5320       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5321
5322     case MULT:
5323       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5324                           make_tree (type, XEXP (x, 1)));
5325
5326     case ASHIFT:
5327       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5328                           make_tree (type, XEXP (x, 1)));
5329
5330     case LSHIFTRT:
5331       t = unsigned_type_for (type);
5332       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5333                                          make_tree (t, XEXP (x, 0)),
5334                                          make_tree (type, XEXP (x, 1))));
5335
5336     case ASHIFTRT:
5337       t = signed_type_for (type);
5338       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5339                                          make_tree (t, XEXP (x, 0)),
5340                                          make_tree (type, XEXP (x, 1))));
5341
5342     case DIV:
5343       if (TREE_CODE (type) != REAL_TYPE)
5344         t = signed_type_for (type);
5345       else
5346         t = type;
5347
5348       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5349                                          make_tree (t, XEXP (x, 0)),
5350                                          make_tree (t, XEXP (x, 1))));
5351     case UDIV:
5352       t = unsigned_type_for (type);
5353       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5354                                          make_tree (t, XEXP (x, 0)),
5355                                          make_tree (t, XEXP (x, 1))));
5356
5357     case SIGN_EXTEND:
5358     case ZERO_EXTEND:
5359       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5360                                           GET_CODE (x) == ZERO_EXTEND);
5361       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5362
5363     case CONST:
5364       {
5365         rtx op = XEXP (x, 0);
5366         if (GET_CODE (op) == VEC_DUPLICATE)
5367           {
5368             tree elt_tree = make_tree (TREE_TYPE (type), XEXP (op, 0));
5369             return build_vector_from_val (type, elt_tree);
5370           }
5371         if (GET_CODE (op) == VEC_SERIES)
5372           {
5373             tree itype = TREE_TYPE (type);
5374             tree base_tree = make_tree (itype, XEXP (op, 0));
5375             tree step_tree = make_tree (itype, XEXP (op, 1));
5376             return build_vec_series (type, base_tree, step_tree);
5377           }
5378         return make_tree (type, op);
5379       }
5380
5381     case SYMBOL_REF:
5382       t = SYMBOL_REF_DECL (x);
5383       if (t)
5384         return fold_convert (type, build_fold_addr_expr (t));
5385       /* fall through.  */
5386
5387     default:
5388       if (CONST_POLY_INT_P (x))
5389         return wide_int_to_tree (t, const_poly_int_value (x));
5390
5391       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5392
5393       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5394          address mode to pointer mode.  */
5395       if (POINTER_TYPE_P (type))
5396         x = convert_memory_address_addr_space
5397           (SCALAR_INT_TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5398
5399       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5400          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5401       t->decl_with_rtl.rtl = x;
5402
5403       return t;
5404     }
5405 }
5406 \f
5407 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5408    and returning TARGET.
5409
5410    If TARGET is 0, a pseudo-register or constant is returned.  */
5411
5412 rtx
5413 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5414 {
5415   rtx tem = 0;
5416
5417   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5418     tem = simplify_binary_operation (AND, mode, op0, op1);
5419   if (tem == 0)
5420     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5421
5422   if (target == 0)
5423     target = tem;
5424   else if (tem != target)
5425     emit_move_insn (target, tem);
5426   return target;
5427 }
5428
5429 /* Helper function for emit_store_flag.  */
5430 rtx
5431 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5432              machine_mode mode, machine_mode compare_mode,
5433              int unsignedp, rtx x, rtx y, int normalizep,
5434              machine_mode target_mode)
5435 {
5436   struct expand_operand ops[4];
5437   rtx op0, comparison, subtarget;
5438   rtx_insn *last;
5439   scalar_int_mode result_mode = targetm.cstore_mode (icode);
5440   scalar_int_mode int_target_mode;
5441
5442   last = get_last_insn ();
5443   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5444   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5445   if (!x || !y)
5446     {
5447       delete_insns_since (last);
5448       return NULL_RTX;
5449     }
5450
5451   if (target_mode == VOIDmode)
5452     int_target_mode = result_mode;
5453   else
5454     int_target_mode = as_a <scalar_int_mode> (target_mode);
5455   if (!target)
5456     target = gen_reg_rtx (int_target_mode);
5457
5458   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5459
5460   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5461   create_fixed_operand (&ops[1], comparison);
5462   create_fixed_operand (&ops[2], x);
5463   create_fixed_operand (&ops[3], y);
5464   if (!maybe_expand_insn (icode, 4, ops))
5465     {
5466       delete_insns_since (last);
5467       return NULL_RTX;
5468     }
5469   subtarget = ops[0].value;
5470
5471   /* If we are converting to a wider mode, first convert to
5472      INT_TARGET_MODE, then normalize.  This produces better combining
5473      opportunities on machines that have a SIGN_EXTRACT when we are
5474      testing a single bit.  This mostly benefits the 68k.
5475
5476      If STORE_FLAG_VALUE does not have the sign bit set when
5477      interpreted in MODE, we can do this conversion as unsigned, which
5478      is usually more efficient.  */
5479   if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (result_mode))
5480     {
5481       convert_move (target, subtarget,
5482                     val_signbit_known_clear_p (result_mode,
5483                                                STORE_FLAG_VALUE));
5484       op0 = target;
5485       result_mode = int_target_mode;
5486     }
5487   else
5488     op0 = subtarget;
5489
5490   /* If we want to keep subexpressions around, don't reuse our last
5491      target.  */
5492   if (optimize)
5493     subtarget = 0;
5494
5495   /* Now normalize to the proper value in MODE.  Sometimes we don't
5496      have to do anything.  */
5497   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5498     ;
5499   /* STORE_FLAG_VALUE might be the most negative number, so write
5500      the comparison this way to avoid a compiler-time warning.  */
5501   else if (- normalizep == STORE_FLAG_VALUE)
5502     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5503
5504   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5505      it hard to use a value of just the sign bit due to ANSI integer
5506      constant typing rules.  */
5507   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5508     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5509                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5510                         normalizep == 1);
5511   else
5512     {
5513       gcc_assert (STORE_FLAG_VALUE & 1);
5514
5515       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5516       if (normalizep == -1)
5517         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5518     }
5519
5520   /* If we were converting to a smaller mode, do the conversion now.  */
5521   if (int_target_mode != result_mode)
5522     {
5523       convert_move (target, op0, 0);
5524       return target;
5525     }
5526   else
5527     return op0;
5528 }
5529
5530
5531 /* A subroutine of emit_store_flag only including "tricks" that do not
5532    need a recursive call.  These are kept separate to avoid infinite
5533    loops.  */
5534
5535 static rtx
5536 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5537                    machine_mode mode, int unsignedp, int normalizep,
5538                    machine_mode target_mode)
5539 {
5540   rtx subtarget;
5541   enum insn_code icode;
5542   machine_mode compare_mode;
5543   enum mode_class mclass;
5544   enum rtx_code scode;
5545
5546   if (unsignedp)
5547     code = unsigned_condition (code);
5548   scode = swap_condition (code);
5549
5550   /* If one operand is constant, make it the second one.  Only do this
5551      if the other operand is not constant as well.  */
5552
5553   if (swap_commutative_operands_p (op0, op1))
5554     {
5555       std::swap (op0, op1);
5556       code = swap_condition (code);
5557     }
5558
5559   if (mode == VOIDmode)
5560     mode = GET_MODE (op0);
5561
5562   /* For some comparisons with 1 and -1, we can convert this to
5563      comparisons with zero.  This will often produce more opportunities for
5564      store-flag insns.  */
5565
5566   switch (code)
5567     {
5568     case LT:
5569       if (op1 == const1_rtx)
5570         op1 = const0_rtx, code = LE;
5571       break;
5572     case LE:
5573       if (op1 == constm1_rtx)
5574         op1 = const0_rtx, code = LT;
5575       break;
5576     case GE:
5577       if (op1 == const1_rtx)
5578         op1 = const0_rtx, code = GT;
5579       break;
5580     case GT:
5581       if (op1 == constm1_rtx)
5582         op1 = const0_rtx, code = GE;
5583       break;
5584     case GEU:
5585       if (op1 == const1_rtx)
5586         op1 = const0_rtx, code = NE;
5587       break;
5588     case LTU:
5589       if (op1 == const1_rtx)
5590         op1 = const0_rtx, code = EQ;
5591       break;
5592     default:
5593       break;
5594     }
5595
5596   /* If we are comparing a double-word integer with zero or -1, we can
5597      convert the comparison into one involving a single word.  */
5598   scalar_int_mode int_mode;
5599   if (is_int_mode (mode, &int_mode)
5600       && GET_MODE_BITSIZE (int_mode) == BITS_PER_WORD * 2
5601       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5602     {
5603       rtx tem;
5604       if ((code == EQ || code == NE)
5605           && (op1 == const0_rtx || op1 == constm1_rtx))
5606         {
5607           rtx op00, op01;
5608
5609           /* Do a logical OR or AND of the two words and compare the
5610              result.  */
5611           op00 = simplify_gen_subreg (word_mode, op0, int_mode, 0);
5612           op01 = simplify_gen_subreg (word_mode, op0, int_mode, UNITS_PER_WORD);
5613           tem = expand_binop (word_mode,
5614                               op1 == const0_rtx ? ior_optab : and_optab,
5615                               op00, op01, NULL_RTX, unsignedp,
5616                               OPTAB_DIRECT);
5617
5618           if (tem != 0)
5619             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5620                                    unsignedp, normalizep);
5621         }
5622       else if ((code == LT || code == GE) && op1 == const0_rtx)
5623         {
5624           rtx op0h;
5625
5626           /* If testing the sign bit, can just test on high word.  */
5627           op0h = simplify_gen_subreg (word_mode, op0, int_mode,
5628                                       subreg_highpart_offset (word_mode,
5629                                                               int_mode));
5630           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5631                                  unsignedp, normalizep);
5632         }
5633       else
5634         tem = NULL_RTX;
5635
5636       if (tem)
5637         {
5638           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5639             return tem;
5640           if (!target)
5641             target = gen_reg_rtx (target_mode);
5642
5643           convert_move (target, tem,
5644                         !val_signbit_known_set_p (word_mode,
5645                                                   (normalizep ? normalizep
5646                                                    : STORE_FLAG_VALUE)));
5647           return target;
5648         }
5649     }
5650
5651   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5652      complement of A (for GE) and shifting the sign bit to the low bit.  */
5653   if (op1 == const0_rtx && (code == LT || code == GE)
5654       && is_int_mode (mode, &int_mode)
5655       && (normalizep || STORE_FLAG_VALUE == 1
5656           || val_signbit_p (int_mode, STORE_FLAG_VALUE)))
5657     {
5658       scalar_int_mode int_target_mode;
5659       subtarget = target;
5660
5661       if (!target)
5662         int_target_mode = int_mode;
5663       else
5664         {
5665           /* If the result is to be wider than OP0, it is best to convert it
5666              first.  If it is to be narrower, it is *incorrect* to convert it
5667              first.  */
5668           int_target_mode = as_a <scalar_int_mode> (target_mode);
5669           if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (int_mode))
5670             {
5671               op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5672               int_mode = int_target_mode;
5673             }
5674         }
5675
5676       if (int_target_mode != int_mode)
5677         subtarget = 0;
5678
5679       if (code == GE)
5680         op0 = expand_unop (int_mode, one_cmpl_optab, op0,
5681                            ((STORE_FLAG_VALUE == 1 || normalizep)
5682                             ? 0 : subtarget), 0);
5683
5684       if (STORE_FLAG_VALUE == 1 || normalizep)
5685         /* If we are supposed to produce a 0/1 value, we want to do
5686            a logical shift from the sign bit to the low-order bit; for
5687            a -1/0 value, we do an arithmetic shift.  */
5688         op0 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5689                             GET_MODE_BITSIZE (int_mode) - 1,
5690                             subtarget, normalizep != -1);
5691
5692       if (int_mode != int_target_mode)
5693         op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5694
5695       return op0;
5696     }
5697
5698   mclass = GET_MODE_CLASS (mode);
5699   FOR_EACH_MODE_FROM (compare_mode, mode)
5700     {
5701      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5702      icode = optab_handler (cstore_optab, optab_mode);
5703      if (icode != CODE_FOR_nothing)
5704         {
5705           do_pending_stack_adjust ();
5706           rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5707                                  unsignedp, op0, op1, normalizep, target_mode);
5708           if (tem)
5709             return tem;
5710
5711           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5712             {
5713               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5714                                  unsignedp, op1, op0, normalizep, target_mode);
5715               if (tem)
5716                 return tem;
5717             }
5718           break;
5719         }
5720     }
5721
5722   return 0;
5723 }
5724
5725 /* Subroutine of emit_store_flag that handles cases in which the operands
5726    are scalar integers.  SUBTARGET is the target to use for temporary
5727    operations and TRUEVAL is the value to store when the condition is
5728    true.  All other arguments are as for emit_store_flag.  */
5729
5730 rtx
5731 emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0,
5732                      rtx op1, scalar_int_mode mode, int unsignedp,
5733                      int normalizep, rtx trueval)
5734 {
5735   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5736   rtx_insn *last = get_last_insn ();
5737
5738   /* If this is an equality comparison of integers, we can try to exclusive-or
5739      (or subtract) the two operands and use a recursive call to try the
5740      comparison with zero.  Don't do any of these cases if branches are
5741      very cheap.  */
5742
5743   if ((code == EQ || code == NE) && op1 != const0_rtx)
5744     {
5745       rtx tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5746                               OPTAB_WIDEN);
5747
5748       if (tem == 0)
5749         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5750                             OPTAB_WIDEN);
5751       if (tem != 0)
5752         tem = emit_store_flag (target, code, tem, const0_rtx,
5753                                mode, unsignedp, normalizep);
5754       if (tem != 0)
5755         return tem;
5756
5757       delete_insns_since (last);
5758     }
5759
5760   /* For integer comparisons, try the reverse comparison.  However, for
5761      small X and if we'd have anyway to extend, implementing "X != 0"
5762      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5763   rtx_code rcode = reverse_condition (code);
5764   if (can_compare_p (rcode, mode, ccp_store_flag)
5765       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5766             && code == NE
5767             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5768             && op1 == const0_rtx))
5769     {
5770       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5771                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5772
5773       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5774       if (want_add
5775           && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5776                        optimize_insn_for_speed_p ()) == 0)
5777         {
5778           rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5779                                        STORE_FLAG_VALUE, target_mode);
5780           if (tem != 0)
5781             tem = expand_binop (target_mode, add_optab, tem,
5782                                 gen_int_mode (normalizep, target_mode),
5783                                 target, 0, OPTAB_WIDEN);
5784           if (tem != 0)
5785             return tem;
5786         }
5787       else if (!want_add
5788                && rtx_cost (trueval, mode, XOR, 1,
5789                             optimize_insn_for_speed_p ()) == 0)
5790         {
5791           rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5792                                        normalizep, target_mode);
5793           if (tem != 0)
5794             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5795                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5796           if (tem != 0)
5797             return tem;
5798         }
5799
5800       delete_insns_since (last);
5801     }
5802
5803   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5804      the constant zero.  Reject all other comparisons at this point.  Only
5805      do LE and GT if branches are expensive since they are expensive on
5806      2-operand machines.  */
5807
5808   if (op1 != const0_rtx
5809       || (code != EQ && code != NE
5810           && (BRANCH_COST (optimize_insn_for_speed_p (),
5811                            false) <= 1 || (code != LE && code != GT))))
5812     return 0;
5813
5814   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5815      do the necessary operation below.  */
5816
5817   rtx tem = 0;
5818
5819   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5820      the sign bit set.  */
5821
5822   if (code == LE)
5823     {
5824       /* This is destructive, so SUBTARGET can't be OP0.  */
5825       if (rtx_equal_p (subtarget, op0))
5826         subtarget = 0;
5827
5828       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5829                           OPTAB_WIDEN);
5830       if (tem)
5831         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5832                             OPTAB_WIDEN);
5833     }
5834
5835   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5836      number of bits in the mode of OP0, minus one.  */
5837
5838   if (code == GT)
5839     {
5840       if (rtx_equal_p (subtarget, op0))
5841         subtarget = 0;
5842
5843       tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5844                                 GET_MODE_BITSIZE (mode) - 1,
5845                                 subtarget, 0);
5846       if (tem)
5847         tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5848                             OPTAB_WIDEN);
5849     }
5850
5851   if (code == EQ || code == NE)
5852     {
5853       /* For EQ or NE, one way to do the comparison is to apply an operation
5854          that converts the operand into a positive number if it is nonzero
5855          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5856          for NE we negate.  This puts the result in the sign bit.  Then we
5857          normalize with a shift, if needed.
5858
5859          Two operations that can do the above actions are ABS and FFS, so try
5860          them.  If that doesn't work, and MODE is smaller than a full word,
5861          we can use zero-extension to the wider mode (an unsigned conversion)
5862          as the operation.  */
5863
5864       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5865          that is compensated by the subsequent overflow when subtracting
5866          one / negating.  */
5867
5868       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5869         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5870       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5871         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5872       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5873         {
5874           tem = convert_modes (word_mode, mode, op0, 1);
5875           mode = word_mode;
5876         }
5877
5878       if (tem != 0)
5879         {
5880           if (code == EQ)
5881             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5882                                 0, OPTAB_WIDEN);
5883           else
5884             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5885         }
5886
5887       /* If we couldn't do it that way, for NE we can "or" the two's complement
5888          of the value with itself.  For EQ, we take the one's complement of
5889          that "or", which is an extra insn, so we only handle EQ if branches
5890          are expensive.  */
5891
5892       if (tem == 0
5893           && (code == NE
5894               || BRANCH_COST (optimize_insn_for_speed_p (),
5895                               false) > 1))
5896         {
5897           if (rtx_equal_p (subtarget, op0))
5898             subtarget = 0;
5899
5900           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5901           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5902                               OPTAB_WIDEN);
5903
5904           if (tem && code == EQ)
5905             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5906         }
5907     }
5908
5909   if (tem && normalizep)
5910     tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5911                               GET_MODE_BITSIZE (mode) - 1,
5912                               subtarget, normalizep == 1);
5913
5914   if (tem)
5915     {
5916       if (!target)
5917         ;
5918       else if (GET_MODE (tem) != target_mode)
5919         {
5920           convert_move (target, tem, 0);
5921           tem = target;
5922         }
5923       else if (!subtarget)
5924         {
5925           emit_move_insn (target, tem);
5926           tem = target;
5927         }
5928     }
5929   else
5930     delete_insns_since (last);
5931
5932   return tem;
5933 }
5934
5935 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5936    and storing in TARGET.  Normally return TARGET.
5937    Return 0 if that cannot be done.
5938
5939    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5940    it is VOIDmode, they cannot both be CONST_INT.
5941
5942    UNSIGNEDP is for the case where we have to widen the operands
5943    to perform the operation.  It says to use zero-extension.
5944
5945    NORMALIZEP is 1 if we should convert the result to be either zero
5946    or one.  Normalize is -1 if we should convert the result to be
5947    either zero or -1.  If NORMALIZEP is zero, the result will be left
5948    "raw" out of the scc insn.  */
5949
5950 rtx
5951 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5952                  machine_mode mode, int unsignedp, int normalizep)
5953 {
5954   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5955   enum rtx_code rcode;
5956   rtx subtarget;
5957   rtx tem, trueval;
5958   rtx_insn *last;
5959
5960   /* If we compare constants, we shouldn't use a store-flag operation,
5961      but a constant load.  We can get there via the vanilla route that
5962      usually generates a compare-branch sequence, but will in this case
5963      fold the comparison to a constant, and thus elide the branch.  */
5964   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5965     return NULL_RTX;
5966
5967   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5968                            target_mode);
5969   if (tem)
5970     return tem;
5971
5972   /* If we reached here, we can't do this with a scc insn, however there
5973      are some comparisons that can be done in other ways.  Don't do any
5974      of these cases if branches are very cheap.  */
5975   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5976     return 0;
5977
5978   /* See what we need to return.  We can only return a 1, -1, or the
5979      sign bit.  */
5980
5981   if (normalizep == 0)
5982     {
5983       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5984         normalizep = STORE_FLAG_VALUE;
5985
5986       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5987         ;
5988       else
5989         return 0;
5990     }
5991
5992   last = get_last_insn ();
5993
5994   /* If optimizing, use different pseudo registers for each insn, instead
5995      of reusing the same pseudo.  This leads to better CSE, but slows
5996      down the compiler, since there are more pseudos.  */
5997   subtarget = (!optimize
5998                && (target_mode == mode)) ? target : NULL_RTX;
5999   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
6000
6001   /* For floating-point comparisons, try the reverse comparison or try
6002      changing the "orderedness" of the comparison.  */
6003   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6004     {
6005       enum rtx_code first_code;
6006       bool and_them;
6007
6008       rcode = reverse_condition_maybe_unordered (code);
6009       if (can_compare_p (rcode, mode, ccp_store_flag)
6010           && (code == ORDERED || code == UNORDERED
6011               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6012               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6013         {
6014           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
6015                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
6016
6017           /* For the reverse comparison, use either an addition or a XOR.  */
6018           if (want_add
6019               && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
6020                            optimize_insn_for_speed_p ()) == 0)
6021             {
6022               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6023                                        STORE_FLAG_VALUE, target_mode);
6024               if (tem)
6025                 return expand_binop (target_mode, add_optab, tem,
6026                                      gen_int_mode (normalizep, target_mode),
6027                                      target, 0, OPTAB_WIDEN);
6028             }
6029           else if (!want_add
6030                    && rtx_cost (trueval, mode, XOR, 1,
6031                                 optimize_insn_for_speed_p ()) == 0)
6032             {
6033               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6034                                        normalizep, target_mode);
6035               if (tem)
6036                 return expand_binop (target_mode, xor_optab, tem, trueval,
6037                                      target, INTVAL (trueval) >= 0,
6038                                      OPTAB_WIDEN);
6039             }
6040         }
6041
6042       delete_insns_since (last);
6043
6044       /* Cannot split ORDERED and UNORDERED, only try the above trick.  */
6045       if (code == ORDERED || code == UNORDERED)
6046         return 0;
6047
6048       and_them = split_comparison (code, mode, &first_code, &code);
6049
6050       /* If there are no NaNs, the first comparison should always fall through.
6051          Effectively change the comparison to the other one.  */
6052       if (!HONOR_NANS (mode))
6053         {
6054           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
6055           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
6056                                     target_mode);
6057         }
6058
6059       if (!HAVE_conditional_move)
6060         return 0;
6061
6062       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
6063          conditional move.  */
6064       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
6065                                normalizep, target_mode);
6066       if (tem == 0)
6067         return 0;
6068
6069       if (and_them)
6070         tem = emit_conditional_move (target, code, op0, op1, mode,
6071                                      tem, const0_rtx, GET_MODE (tem), 0);
6072       else
6073         tem = emit_conditional_move (target, code, op0, op1, mode,
6074                                      trueval, tem, GET_MODE (tem), 0);
6075
6076       if (tem == 0)
6077         delete_insns_since (last);
6078       return tem;
6079     }
6080
6081   /* The remaining tricks only apply to integer comparisons.  */
6082
6083   scalar_int_mode int_mode;
6084   if (is_int_mode (mode, &int_mode))
6085     return emit_store_flag_int (target, subtarget, code, op0, op1, int_mode,
6086                                 unsignedp, normalizep, trueval);
6087
6088   return 0;
6089 }
6090
6091 /* Like emit_store_flag, but always succeeds.  */
6092
6093 rtx
6094 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
6095                        machine_mode mode, int unsignedp, int normalizep)
6096 {
6097   rtx tem;
6098   rtx_code_label *label;
6099   rtx trueval, falseval;
6100
6101   /* First see if emit_store_flag can do the job.  */
6102   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
6103   if (tem != 0)
6104     return tem;
6105
6106   if (!target)
6107     target = gen_reg_rtx (word_mode);
6108
6109   /* If this failed, we have to do this with set/compare/jump/set code.
6110      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
6111   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
6112   if (code == NE
6113       && GET_MODE_CLASS (mode) == MODE_INT
6114       && REG_P (target)
6115       && op0 == target
6116       && op1 == const0_rtx)
6117     {
6118       label = gen_label_rtx ();
6119       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
6120                                NULL_RTX, NULL, label,
6121                                profile_probability::uninitialized ());
6122       emit_move_insn (target, trueval);
6123       emit_label (label);
6124       return target;
6125     }
6126
6127   if (!REG_P (target)
6128       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
6129     target = gen_reg_rtx (GET_MODE (target));
6130
6131   /* Jump in the right direction if the target cannot implement CODE
6132      but can jump on its reverse condition.  */
6133   falseval = const0_rtx;
6134   if (! can_compare_p (code, mode, ccp_jump)
6135       && (! FLOAT_MODE_P (mode)
6136           || code == ORDERED || code == UNORDERED
6137           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6138           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6139     {
6140       enum rtx_code rcode;
6141       if (FLOAT_MODE_P (mode))
6142         rcode = reverse_condition_maybe_unordered (code);
6143       else
6144         rcode = reverse_condition (code);
6145
6146       /* Canonicalize to UNORDERED for the libcall.  */
6147       if (can_compare_p (rcode, mode, ccp_jump)
6148           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
6149         {
6150           falseval = trueval;
6151           trueval = const0_rtx;
6152           code = rcode;
6153         }
6154     }
6155
6156   emit_move_insn (target, trueval);
6157   label = gen_label_rtx ();
6158   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
6159                            label, profile_probability::uninitialized ());
6160
6161   emit_move_insn (target, falseval);
6162   emit_label (label);
6163
6164   return target;
6165 }
6166 \f
6167 /* Perform possibly multi-word comparison and conditional jump to LABEL
6168    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
6169    now a thin wrapper around do_compare_rtx_and_jump.  */
6170
6171 static void
6172 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6173                  rtx_code_label *label)
6174 {
6175   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6176   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6177                            NULL, label, profile_probability::uninitialized ());
6178 }