gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2015 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "predict.h"
  27 #include "tree.h"
  28 #include "rtl.h"
  29 #include "df.h"
  30 #include "diagnostic-core.h"
  31 #include "alias.h"
  32 #include "fold-const.h"
  33 #include "stor-layout.h"
  34 #include "tm_p.h"
  35 #include "flags.h"
  36 #include "insn-config.h"
  37 #include "expmed.h"
  38 #include "dojump.h"
  39 #include "explow.h"
  40 #include "calls.h"
  41 #include "emit-rtl.h"
  42 #include "varasm.h"
  43 #include "stmt.h"
  44 #include "expr.h"
  45 #include "insn-codes.h"
  46 #include "optabs.h"
  47 #include "recog.h"
  48 #include "langhooks.h"
  49 #include "target.h"
  50
  51 struct target_expmed default_target_expmed;
  52 #if SWITCHABLE_TARGET
  53 struct target_expmed *this_target_expmed = &default_target_expmed;
  54 #endif
  55
  56 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  57                                    unsigned HOST_WIDE_INT,
  58                                    unsigned HOST_WIDE_INT,
  59                                    unsigned HOST_WIDE_INT,
  60                                    rtx);
  61 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
  62                                      unsigned HOST_WIDE_INT,
  63                                      rtx);
  64 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  65                                    unsigned HOST_WIDE_INT,
  66                                    unsigned HOST_WIDE_INT,
  67                                    unsigned HOST_WIDE_INT,
  68                                    rtx);
  69 static rtx extract_fixed_bit_field (machine_mode, rtx,
  70                                     unsigned HOST_WIDE_INT,
  71                                     unsigned HOST_WIDE_INT, rtx, int);
  72 static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
  73                                       unsigned HOST_WIDE_INT,
  74                                       unsigned HOST_WIDE_INT, rtx, int);
  75 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  76 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  77                                     unsigned HOST_WIDE_INT, int);
  78 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  79 static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  80 static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  81
  82 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  83    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  84    The mask is truncated if necessary to the width of mode MODE.  The
  85    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
  86
  87 static inline rtx
  88 mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
  89 {
  90   return immed_wide_int_const
  91     (wi::shifted_mask (bitpos, bitsize, complement,
  92                        GET_MODE_PRECISION (mode)), mode);
  93 }
  94
  95 /* Test whether a value is zero of a power of two.  */
  96 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
  97   (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
  98
  99 struct init_expmed_rtl
 100 {
 101   rtx reg;
 102   rtx plus;
 103   rtx neg;
 104   rtx mult;
 105   rtx sdiv;
 106   rtx udiv;
 107   rtx sdiv_32;
 108   rtx smod_32;
 109   rtx wide_mult;
 110   rtx wide_lshr;
 111   rtx wide_trunc;
 112   rtx shift;
 113   rtx shift_mult;
 114   rtx shift_add;
 115   rtx shift_sub0;
 116   rtx shift_sub1;
 117   rtx zext;
 118   rtx trunc;
 119
 120   rtx pow2[MAX_BITS_PER_WORD];
 121   rtx cint[MAX_BITS_PER_WORD];
 122 };
 123
 124 static void
 125 init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
 126                       machine_mode from_mode, bool speed)
 127 {
 128   int to_size, from_size;
 129   rtx which;
 130
 131   to_size = GET_MODE_PRECISION (to_mode);
 132   from_size = GET_MODE_PRECISION (from_mode);
 133
 134   /* Most partial integers have a precision less than the "full"
 135      integer it requires for storage.  In case one doesn't, for
 136      comparison purposes here, reduce the bit size by one in that
 137      case.  */
 138   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 139       && exact_log2 (to_size) != -1)
 140     to_size --;
 141   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 142       && exact_log2 (from_size) != -1)
 143     from_size --;
 144
 145   /* Assume cost of zero-extend and sign-extend is the same.  */
 146   which = (to_size < from_size ? all->trunc : all->zext);
 147
 148   PUT_MODE (all->reg, from_mode);
 149   set_convert_cost (to_mode, from_mode, speed,
 150                     set_src_cost (which, to_mode, speed));
 151 }
 152
 153 static void
 154 init_expmed_one_mode (struct init_expmed_rtl *all,
 155                       machine_mode mode, int speed)
 156 {
 157   int m, n, mode_bitsize;
 158   machine_mode mode_from;
 159
 160   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 161
 162   PUT_MODE (all->reg, mode);
 163   PUT_MODE (all->plus, mode);
 164   PUT_MODE (all->neg, mode);
 165   PUT_MODE (all->mult, mode);
 166   PUT_MODE (all->sdiv, mode);
 167   PUT_MODE (all->udiv, mode);
 168   PUT_MODE (all->sdiv_32, mode);
 169   PUT_MODE (all->smod_32, mode);
 170   PUT_MODE (all->wide_trunc, mode);
 171   PUT_MODE (all->shift, mode);
 172   PUT_MODE (all->shift_mult, mode);
 173   PUT_MODE (all->shift_add, mode);
 174   PUT_MODE (all->shift_sub0, mode);
 175   PUT_MODE (all->shift_sub1, mode);
 176   PUT_MODE (all->zext, mode);
 177   PUT_MODE (all->trunc, mode);
 178
 179   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
 180   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
 181   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
 182   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
 183   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
 184
 185   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
 186                                      <= 2 * add_cost (speed, mode)));
 187   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
 188                                      <= 4 * add_cost (speed, mode)));
 189
 190   set_shift_cost (speed, mode, 0, 0);
 191   {
 192     int cost = add_cost (speed, mode);
 193     set_shiftadd_cost (speed, mode, 0, cost);
 194     set_shiftsub0_cost (speed, mode, 0, cost);
 195     set_shiftsub1_cost (speed, mode, 0, cost);
 196   }
 197
 198   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 199   for (m = 1; m < n; m++)
 200     {
 201       XEXP (all->shift, 1) = all->cint[m];
 202       XEXP (all->shift_mult, 1) = all->pow2[m];
 203
 204       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
 205       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
 206                                                        speed));
 207       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
 208                                                         speed));
 209       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
 210                                                         speed));
 211     }
 212
 213   if (SCALAR_INT_MODE_P (mode))
 214     {
 215       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 216            mode_from = (machine_mode)(mode_from + 1))
 217         init_expmed_one_conv (all, mode, mode_from, speed);
 218     }
 219   if (GET_MODE_CLASS (mode) == MODE_INT)
 220     {
 221       machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
 222       if (wider_mode != VOIDmode)
 223         {
 224           PUT_MODE (all->zext, wider_mode);
 225           PUT_MODE (all->wide_mult, wider_mode);
 226           PUT_MODE (all->wide_lshr, wider_mode);
 227           XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 228
 229           set_mul_widen_cost (speed, wider_mode,
 230                               set_src_cost (all->wide_mult, wider_mode, speed));
 231           set_mul_highpart_cost (speed, mode,
 232                                  set_src_cost (all->wide_trunc, mode, speed));
 233         }
 234     }
 235 }
 236
 237 void
 238 init_expmed (void)
 239 {
 240   struct init_expmed_rtl all;
 241   machine_mode mode = QImode;
 242   int m, speed;
 243
 244   memset (&all, 0, sizeof all);
 245   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 246     {
 247       all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 248       all.cint[m] = GEN_INT (m);
 249     }
 250
 251   /* Avoid using hard regs in ways which may be unsupported.  */
 252   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 253   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 254   all.neg = gen_rtx_NEG (mode, all.reg);
 255   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 256   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 257   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 258   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 259   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 260   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 261   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 262   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 263   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 264   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 265   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 266   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 267   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 268   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 269   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 270
 271   for (speed = 0; speed < 2; speed++)
 272     {
 273       crtl->maybe_hot_insn_p = speed;
 274       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
 275
 276       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 277            mode = (machine_mode)(mode + 1))
 278         init_expmed_one_mode (&all, mode, speed);
 279
 280       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 281         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 282              mode = (machine_mode)(mode + 1))
 283           init_expmed_one_mode (&all, mode, speed);
 284
 285       if (MIN_MODE_VECTOR_INT != VOIDmode)
 286         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 287              mode = (machine_mode)(mode + 1))
 288           init_expmed_one_mode (&all, mode, speed);
 289     }
 290
 291   if (alg_hash_used_p ())
 292     {
 293       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 294       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 295     }
 296   else
 297     set_alg_hash_used_p (true);
 298   default_rtl_profile ();
 299
 300   ggc_free (all.trunc);
 301   ggc_free (all.shift_sub1);
 302   ggc_free (all.shift_sub0);
 303   ggc_free (all.shift_add);
 304   ggc_free (all.shift_mult);
 305   ggc_free (all.shift);
 306   ggc_free (all.wide_trunc);
 307   ggc_free (all.wide_lshr);
 308   ggc_free (all.wide_mult);
 309   ggc_free (all.zext);
 310   ggc_free (all.smod_32);
 311   ggc_free (all.sdiv_32);
 312   ggc_free (all.udiv);
 313   ggc_free (all.sdiv);
 314   ggc_free (all.mult);
 315   ggc_free (all.neg);
 316   ggc_free (all.plus);
 317   ggc_free (all.reg);
 318 }
 319
 320 /* Return an rtx representing minus the value of X.
 321    MODE is the intended mode of the result,
 322    useful if X is a CONST_INT.  */
 323
 324 rtx
 325 negate_rtx (machine_mode mode, rtx x)
 326 {
 327   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 328
 329   if (result == 0)
 330     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 331
 332   return result;
 333 }
 334
 335 /* Adjust bitfield memory MEM so that it points to the first unit of mode
 336    MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
 337    If MODE is BLKmode, return a reference to every byte in the bitfield.
 338    Set *NEW_BITNUM to the bit position of the field within the new memory.  */
 339
 340 static rtx
 341 narrow_bit_field_mem (rtx mem, machine_mode mode,
 342                       unsigned HOST_WIDE_INT bitsize,
 343                       unsigned HOST_WIDE_INT bitnum,
 344                       unsigned HOST_WIDE_INT *new_bitnum)
 345 {
 346   if (mode == BLKmode)
 347     {
 348       *new_bitnum = bitnum % BITS_PER_UNIT;
 349       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 350       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 351                             / BITS_PER_UNIT);
 352       return adjust_bitfield_address_size (mem, mode, offset, size);
 353     }
 354   else
 355     {
 356       unsigned int unit = GET_MODE_BITSIZE (mode);
 357       *new_bitnum = bitnum % unit;
 358       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 359       return adjust_bitfield_address (mem, mode, offset);
 360     }
 361 }
 362
 363 /* The caller wants to perform insertion or extraction PATTERN on a
 364    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 365    BITREGION_START and BITREGION_END are as for store_bit_field
 366    and FIELDMODE is the natural mode of the field.
 367
 368    Search for a mode that is compatible with the memory access
 369    restrictions and (where applicable) with a register insertion or
 370    extraction.  Return the new memory on success, storing the adjusted
 371    bit position in *NEW_BITNUM.  Return null otherwise.  */
 372
 373 static rtx
 374 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 375                               rtx op0, HOST_WIDE_INT bitsize,
 376                               HOST_WIDE_INT bitnum,
 377                               unsigned HOST_WIDE_INT bitregion_start,
 378                               unsigned HOST_WIDE_INT bitregion_end,
 379                               machine_mode fieldmode,
 380                               unsigned HOST_WIDE_INT *new_bitnum)
 381 {
 382   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 383                                 bitregion_end, MEM_ALIGN (op0),
 384                                 MEM_VOLATILE_P (op0));
 385   machine_mode best_mode;
 386   if (iter.next_mode (&best_mode))
 387     {
 388       /* We can use a memory in BEST_MODE.  See whether this is true for
 389          any wider modes.  All other things being equal, we prefer to
 390          use the widest mode possible because it tends to expose more
 391          CSE opportunities.  */
 392       if (!iter.prefer_smaller_modes ())
 393         {
 394           /* Limit the search to the mode required by the corresponding
 395              register insertion or extraction instruction, if any.  */
 396           machine_mode limit_mode = word_mode;
 397           extraction_insn insn;
 398           if (get_best_reg_extraction_insn (&insn, pattern,
 399                                             GET_MODE_BITSIZE (best_mode),
 400                                             fieldmode))
 401             limit_mode = insn.field_mode;
 402
 403           machine_mode wider_mode;
 404           while (iter.next_mode (&wider_mode)
 405                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 406             best_mode = wider_mode;
 407         }
 408       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 409                                    new_bitnum);
 410     }
 411   return NULL_RTX;
 412 }
 413
 414 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 415    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 416    offset is then BITNUM / BITS_PER_UNIT.  */
 417
 418 static bool
 419 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 420                      unsigned HOST_WIDE_INT bitsize,
 421                      machine_mode struct_mode)
 422 {
 423   if (BYTES_BIG_ENDIAN)
 424     return (bitnum % BITS_PER_UNIT == 0
 425             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 426                 || (bitnum + bitsize) % BITS_PER_WORD == 0));
 427   else
 428     return bitnum % BITS_PER_WORD == 0;
 429 }
 430
 431 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 432    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 433    Return false if the access would touch memory outside the range
 434    BITREGION_START to BITREGION_END for conformance to the C++ memory
 435    model.  */
 436
 437 static bool
 438 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 439                             unsigned HOST_WIDE_INT bitnum,
 440                             machine_mode fieldmode,
 441                             unsigned HOST_WIDE_INT bitregion_start,
 442                             unsigned HOST_WIDE_INT bitregion_end)
 443 {
 444   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 445
 446   /* -fstrict-volatile-bitfields must be enabled and we must have a
 447      volatile MEM.  */
 448   if (!MEM_P (op0)
 449       || !MEM_VOLATILE_P (op0)
 450       || flag_strict_volatile_bitfields <= 0)
 451     return false;
 452
 453   /* Non-integral modes likely only happen with packed structures.
 454      Punt.  */
 455   if (!SCALAR_INT_MODE_P (fieldmode))
 456     return false;
 457
 458   /* The bit size must not be larger than the field mode, and
 459      the field mode must not be larger than a word.  */
 460   if (bitsize > modesize || modesize > BITS_PER_WORD)
 461     return false;
 462
 463   /* Check for cases of unaligned fields that must be split.  */
 464   if (bitnum % modesize + bitsize > modesize)
 465     return false;
 466
 467   /* The memory must be sufficiently aligned for a MODESIZE access.
 468      This condition guarantees, that the memory access will not
 469      touch anything after the end of the structure.  */
 470   if (MEM_ALIGN (op0) < modesize)
 471     return false;
 472
 473   /* Check for cases where the C++ memory model applies.  */
 474   if (bitregion_end != 0
 475       && (bitnum - bitnum % modesize < bitregion_start
 476           || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
 477     return false;
 478
 479   return true;
 480 }
 481
 482 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 483    bit number BITNUM can be treated as a simple value of mode MODE.  */
 484
 485 static bool
 486 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 487                        unsigned HOST_WIDE_INT bitnum, machine_mode mode)
 488 {
 489   return (MEM_P (op0)
 490           && bitnum % BITS_PER_UNIT == 0
 491           && bitsize == GET_MODE_BITSIZE (mode)
 492           && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
 493               || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
 494                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 495 }
 496 \f
 497 /* Try to use instruction INSV to store VALUE into a field of OP0.
 498    BITSIZE and BITNUM are as for store_bit_field.  */
 499
 500 static bool
 501 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 502                             unsigned HOST_WIDE_INT bitsize,
 503                             unsigned HOST_WIDE_INT bitnum,
 504                             rtx value)
 505 {
 506   struct expand_operand ops[4];
 507   rtx value1;
 508   rtx xop0 = op0;
 509   rtx_insn *last = get_last_insn ();
 510   bool copy_back = false;
 511
 512   machine_mode op_mode = insv->field_mode;
 513   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 514   if (bitsize == 0 || bitsize > unit)
 515     return false;
 516
 517   if (MEM_P (xop0))
 518     /* Get a reference to the first byte of the field.  */
 519     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 520                                  &bitnum);
 521   else
 522     {
 523       /* Convert from counting within OP0 to counting in OP_MODE.  */
 524       if (BYTES_BIG_ENDIAN)
 525         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 526
 527       /* If xop0 is a register, we need it in OP_MODE
 528          to make it acceptable to the format of insv.  */
 529       if (GET_CODE (xop0) == SUBREG)
 530         /* We can't just change the mode, because this might clobber op0,
 531            and we will need the original value of op0 if insv fails.  */
 532         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 533       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 534         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 535     }
 536
 537   /* If the destination is a paradoxical subreg such that we need a
 538      truncate to the inner mode, perform the insertion on a temporary and
 539      truncate the result to the original destination.  Note that we can't
 540      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 541      X) 0)) is (reg:N X).  */
 542   if (GET_CODE (xop0) == SUBREG
 543       && REG_P (SUBREG_REG (xop0))
 544       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 545                                          op_mode))
 546     {
 547       rtx tem = gen_reg_rtx (op_mode);
 548       emit_move_insn (tem, xop0);
 549       xop0 = tem;
 550       copy_back = true;
 551     }
 552
 553   /* There are similar overflow check at the start of store_bit_field_1,
 554      but that only check the situation where the field lies completely
 555      outside the register, while there do have situation where the field
 556      lies partialy in the register, we need to adjust bitsize for this
 557      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
 558      will broken on those arch support bit insert instruction, like arm, aarch64
 559      etc.  */
 560   if (bitsize + bitnum > unit && bitnum < unit)
 561     {
 562       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
 563                "destination object, data truncated into %wu-bit",
 564                bitsize, unit - bitnum);
 565       bitsize = unit - bitnum;
 566     }
 567
 568   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 569      "backwards" from the size of the unit we are inserting into.
 570      Otherwise, we count bits from the most significant on a
 571      BYTES/BITS_BIG_ENDIAN machine.  */
 572
 573   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 574     bitnum = unit - bitsize - bitnum;
 575
 576   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 577   value1 = value;
 578   if (GET_MODE (value) != op_mode)
 579     {
 580       if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 581         {
 582           /* Optimization: Don't bother really extending VALUE
 583              if it has all the bits we will actually use.  However,
 584              if we must narrow it, be sure we do it correctly.  */
 585
 586           if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 587             {
 588               rtx tmp;
 589
 590               tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 591               if (! tmp)
 592                 tmp = simplify_gen_subreg (op_mode,
 593                                            force_reg (GET_MODE (value),
 594                                                       value1),
 595                                            GET_MODE (value), 0);
 596               value1 = tmp;
 597             }
 598           else
 599             value1 = gen_lowpart (op_mode, value1);
 600         }
 601       else if (CONST_INT_P (value))
 602         value1 = gen_int_mode (INTVAL (value), op_mode);
 603       else
 604         /* Parse phase is supposed to make VALUE's data type
 605            match that of the component reference, which is a type
 606            at least as wide as the field; so VALUE should have
 607            a mode that corresponds to that type.  */
 608         gcc_assert (CONSTANT_P (value));
 609     }
 610
 611   create_fixed_operand (&ops[0], xop0);
 612   create_integer_operand (&ops[1], bitsize);
 613   create_integer_operand (&ops[2], bitnum);
 614   create_input_operand (&ops[3], value1, op_mode);
 615   if (maybe_expand_insn (insv->icode, 4, ops))
 616     {
 617       if (copy_back)
 618         convert_move (op0, xop0, true);
 619       return true;
 620     }
 621   delete_insns_since (last);
 622   return false;
 623 }
 624
 625 /* A subroutine of store_bit_field, with the same arguments.  Return true
 626    if the operation could be implemented.
 627
 628    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 629    no other way of implementing the operation.  If FALLBACK_P is false,
 630    return false instead.  */
 631
 632 static bool
 633 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 634                    unsigned HOST_WIDE_INT bitnum,
 635                    unsigned HOST_WIDE_INT bitregion_start,
 636                    unsigned HOST_WIDE_INT bitregion_end,
 637                    machine_mode fieldmode,
 638                    rtx value, bool fallback_p)
 639 {
 640   rtx op0 = str_rtx;
 641   rtx orig_value;
 642
 643   while (GET_CODE (op0) == SUBREG)
 644     {
 645       /* The following line once was done only if WORDS_BIG_ENDIAN,
 646          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 647          meaningful at a much higher level; when structures are copied
 648          between memory and regs, the higher-numbered regs
 649          always get higher addresses.  */
 650       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 651       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 652       int byte_offset = 0;
 653
 654       /* Paradoxical subregs need special handling on big endian machines.  */
 655       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 656         {
 657           int difference = inner_mode_size - outer_mode_size;
 658
 659           if (WORDS_BIG_ENDIAN)
 660             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 661           if (BYTES_BIG_ENDIAN)
 662             byte_offset += difference % UNITS_PER_WORD;
 663         }
 664       else
 665         byte_offset = SUBREG_BYTE (op0);
 666
 667       bitnum += byte_offset * BITS_PER_UNIT;
 668       op0 = SUBREG_REG (op0);
 669     }
 670
 671   /* No action is needed if the target is a register and if the field
 672      lies completely outside that register.  This can occur if the source
 673      code contains an out-of-bounds access to a small array.  */
 674   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 675     return true;
 676
 677   /* Use vec_set patterns for inserting parts of vectors whenever
 678      available.  */
 679   if (VECTOR_MODE_P (GET_MODE (op0))
 680       && !MEM_P (op0)
 681       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 682       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 683       && bitsize == GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
 684       && !(bitnum % GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
 685     {
 686       struct expand_operand ops[3];
 687       machine_mode outermode = GET_MODE (op0);
 688       machine_mode innermode = GET_MODE_INNER (outermode);
 689       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 690       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 691
 692       create_fixed_operand (&ops[0], op0);
 693       create_input_operand (&ops[1], value, innermode);
 694       create_integer_operand (&ops[2], pos);
 695       if (maybe_expand_insn (icode, 3, ops))
 696         return true;
 697     }
 698
 699   /* If the target is a register, overwriting the entire object, or storing
 700      a full-word or multi-word field can be done with just a SUBREG.  */
 701   if (!MEM_P (op0)
 702       && bitsize == GET_MODE_BITSIZE (fieldmode)
 703       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 704           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 705     {
 706       /* Use the subreg machinery either to narrow OP0 to the required
 707          words or to cope with mode punning between equal-sized modes.
 708          In the latter case, use subreg on the rhs side, not lhs.  */
 709       rtx sub;
 710
 711       if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
 712         {
 713           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 714           if (sub)
 715             {
 716               emit_move_insn (op0, sub);
 717               return true;
 718             }
 719         }
 720       else
 721         {
 722           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 723                                      bitnum / BITS_PER_UNIT);
 724           if (sub)
 725             {
 726               emit_move_insn (sub, value);
 727               return true;
 728             }
 729         }
 730     }
 731
 732   /* If the target is memory, storing any naturally aligned field can be
 733      done with a simple store.  For targets that support fast unaligned
 734      memory, any naturally sized, unit aligned field can be done directly.  */
 735   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
 736     {
 737       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 738       emit_move_insn (op0, value);
 739       return true;
 740     }
 741
 742   /* Make sure we are playing with integral modes.  Pun with subregs
 743      if we aren't.  This must come after the entire register case above,
 744      since that case is valid for any mode.  The following cases are only
 745      valid for integral modes.  */
 746   {
 747     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 748     if (imode != GET_MODE (op0))
 749       {
 750         if (MEM_P (op0))
 751           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
 752         else
 753           {
 754             gcc_assert (imode != BLKmode);
 755             op0 = gen_lowpart (imode, op0);
 756           }
 757       }
 758   }
 759
 760   /* We allow move between structures of same size but different mode.
 761      If source is in memory and the mode differs, simply change the memory.  */
 762   if (GET_MODE (value) == BLKmode && GET_MODE (op0) != BLKmode)
 763     {
 764       gcc_assert (MEM_P (value));
 765       value = adjust_address_nv (value, GET_MODE (op0), 0);
 766     }
 767
 768   /* Storing an lsb-aligned field in a register
 769      can be done with a movstrict instruction.  */
 770
 771   if (!MEM_P (op0)
 772       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 773       && bitsize == GET_MODE_BITSIZE (fieldmode)
 774       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 775     {
 776       struct expand_operand ops[2];
 777       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 778       rtx arg0 = op0;
 779       unsigned HOST_WIDE_INT subreg_off;
 780
 781       if (GET_CODE (arg0) == SUBREG)
 782         {
 783           /* Else we've got some float mode source being extracted into
 784              a different float mode destination -- this combination of
 785              subregs results in Severe Tire Damage.  */
 786           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 787                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 788                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 789           arg0 = SUBREG_REG (arg0);
 790         }
 791
 792       subreg_off = bitnum / BITS_PER_UNIT;
 793       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 794         {
 795           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 796
 797           create_fixed_operand (&ops[0], arg0);
 798           /* Shrink the source operand to FIELDMODE.  */
 799           create_convert_operand_to (&ops[1], value, fieldmode, false);
 800           if (maybe_expand_insn (icode, 2, ops))
 801             return true;
 802         }
 803     }
 804
 805   /* Handle fields bigger than a word.  */
 806
 807   if (bitsize > BITS_PER_WORD)
 808     {
 809       /* Here we transfer the words of the field
 810          in the order least significant first.
 811          This is because the most significant word is the one which may
 812          be less than full.
 813          However, only do that if the value is not BLKmode.  */
 814
 815       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 816       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 817       unsigned int i;
 818       rtx_insn *last;
 819
 820       /* This is the mode we must force value to, so that there will be enough
 821          subwords to extract.  Note that fieldmode will often (always?) be
 822          VOIDmode, because that is what store_field uses to indicate that this
 823          is a bit field, but passing VOIDmode to operand_subword_force
 824          is not allowed.  */
 825       fieldmode = GET_MODE (value);
 826       if (fieldmode == VOIDmode)
 827         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 828
 829       last = get_last_insn ();
 830       for (i = 0; i < nwords; i++)
 831         {
 832           /* If I is 0, use the low-order word in both field and target;
 833              if I is 1, use the next to lowest word; and so on.  */
 834           unsigned int wordnum = (backwards
 835                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 836                                   - i - 1
 837                                   : i);
 838           unsigned int bit_offset = (backwards
 839                                      ? MAX ((int) bitsize - ((int) i + 1)
 840                                             * BITS_PER_WORD,
 841                                             0)
 842                                      : (int) i * BITS_PER_WORD);
 843           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 844           unsigned HOST_WIDE_INT new_bitsize =
 845             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 846
 847           /* If the remaining chunk doesn't have full wordsize we have
 848              to make sure that for big endian machines the higher order
 849              bits are used.  */
 850           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 851             value_word = simplify_expand_binop (word_mode, lshr_optab,
 852                                                 value_word,
 853                                                 GEN_INT (BITS_PER_WORD
 854                                                          - new_bitsize),
 855                                                 NULL_RTX, true,
 856                                                 OPTAB_LIB_WIDEN);
 857
 858           if (!store_bit_field_1 (op0, new_bitsize,
 859                                   bitnum + bit_offset,
 860                                   bitregion_start, bitregion_end,
 861                                   word_mode,
 862                                   value_word, fallback_p))
 863             {
 864               delete_insns_since (last);
 865               return false;
 866             }
 867         }
 868       return true;
 869     }
 870
 871   /* If VALUE has a floating-point or complex mode, access it as an
 872      integer of the corresponding size.  This can occur on a machine
 873      with 64 bit registers that uses SFmode for float.  It can also
 874      occur for unaligned float or complex fields.  */
 875   orig_value = value;
 876   if (GET_MODE (value) != VOIDmode
 877       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 878       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 879     {
 880       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 881       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 882     }
 883
 884   /* If OP0 is a multi-word register, narrow it to the affected word.
 885      If the region spans two words, defer to store_split_bit_field.  */
 886   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 887     {
 888       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
 889                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 890       gcc_assert (op0);
 891       bitnum %= BITS_PER_WORD;
 892       if (bitnum + bitsize > BITS_PER_WORD)
 893         {
 894           if (!fallback_p)
 895             return false;
 896
 897           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 898                                  bitregion_end, value);
 899           return true;
 900         }
 901     }
 902
 903   /* From here on we can assume that the field to be stored in fits
 904      within a word.  If the destination is a register, it too fits
 905      in a word.  */
 906
 907   extraction_insn insv;
 908   if (!MEM_P (op0)
 909       && get_best_reg_extraction_insn (&insv, EP_insv,
 910                                        GET_MODE_BITSIZE (GET_MODE (op0)),
 911                                        fieldmode)
 912       && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 913     return true;
 914
 915   /* If OP0 is a memory, try copying it to a register and seeing if a
 916      cheap register alternative is available.  */
 917   if (MEM_P (op0))
 918     {
 919       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
 920                                         fieldmode)
 921           && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 922         return true;
 923
 924       rtx_insn *last = get_last_insn ();
 925
 926       /* Try loading part of OP0 into a register, inserting the bitfield
 927          into that, and then copying the result back to OP0.  */
 928       unsigned HOST_WIDE_INT bitpos;
 929       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
 930                                                bitregion_start, bitregion_end,
 931                                                fieldmode, &bitpos);
 932       if (xop0)
 933         {
 934           rtx tempreg = copy_to_reg (xop0);
 935           if (store_bit_field_1 (tempreg, bitsize, bitpos,
 936                                  bitregion_start, bitregion_end,
 937                                  fieldmode, orig_value, false))
 938             {
 939               emit_move_insn (xop0, tempreg);
 940               return true;
 941             }
 942           delete_insns_since (last);
 943         }
 944     }
 945
 946   if (!fallback_p)
 947     return false;
 948
 949   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
 950                          bitregion_end, value);
 951   return true;
 952 }
 953
 954 /* Generate code to store value from rtx VALUE
 955    into a bit-field within structure STR_RTX
 956    containing BITSIZE bits starting at bit BITNUM.
 957
 958    BITREGION_START is bitpos of the first bitfield in this region.
 959    BITREGION_END is the bitpos of the ending bitfield in this region.
 960    These two fields are 0, if the C++ memory model does not apply,
 961    or we are not interested in keeping track of bitfield regions.
 962
 963    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 964
 965 void
 966 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 967                  unsigned HOST_WIDE_INT bitnum,
 968                  unsigned HOST_WIDE_INT bitregion_start,
 969                  unsigned HOST_WIDE_INT bitregion_end,
 970                  machine_mode fieldmode,
 971                  rtx value)
 972 {
 973   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
 974   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
 975                                   bitregion_start, bitregion_end))
 976     {
 977       /* Storing of a full word can be done with a simple store.
 978          We know here that the field can be accessed with one single
 979          instruction.  For targets that support unaligned memory,
 980          an unaligned access may be necessary.  */
 981       if (bitsize == GET_MODE_BITSIZE (fieldmode))
 982         {
 983           str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
 984                                              bitnum / BITS_PER_UNIT);
 985           gcc_assert (bitnum % BITS_PER_UNIT == 0);
 986           emit_move_insn (str_rtx, value);
 987         }
 988       else
 989         {
 990           rtx temp;
 991
 992           str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
 993                                           &bitnum);
 994           gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (fieldmode));
 995           temp = copy_to_reg (str_rtx);
 996           if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
 997                                   fieldmode, value, true))
 998             gcc_unreachable ();
 999
1000           emit_move_insn (str_rtx, temp);
1001         }
1002
1003       return;
1004     }
1005
1006   /* Under the C++0x memory model, we must not touch bits outside the
1007      bit region.  Adjust the address to start at the beginning of the
1008      bit region.  */
1009   if (MEM_P (str_rtx) && bitregion_start > 0)
1010     {
1011       machine_mode bestmode;
1012       HOST_WIDE_INT offset, size;
1013
1014       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1015
1016       offset = bitregion_start / BITS_PER_UNIT;
1017       bitnum -= bitregion_start;
1018       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1019       bitregion_end -= bitregion_start;
1020       bitregion_start = 0;
1021       bestmode = get_best_mode (bitsize, bitnum,
1022                                 bitregion_start, bitregion_end,
1023                                 MEM_ALIGN (str_rtx), VOIDmode,
1024                                 MEM_VOLATILE_P (str_rtx));
1025       str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
1026     }
1027
1028   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1029                           bitregion_start, bitregion_end,
1030                           fieldmode, value, true))
1031     gcc_unreachable ();
1032 }
1033 \f
1034 /* Use shifts and boolean operations to store VALUE into a bit field of
1035    width BITSIZE in OP0, starting at bit BITNUM.  */
1036
1037 static void
1038 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1039                        unsigned HOST_WIDE_INT bitnum,
1040                        unsigned HOST_WIDE_INT bitregion_start,
1041                        unsigned HOST_WIDE_INT bitregion_end,
1042                        rtx value)
1043 {
1044   /* There is a case not handled here:
1045      a structure with a known alignment of just a halfword
1046      and a field split across two aligned halfwords within the structure.
1047      Or likewise a structure with a known alignment of just a byte
1048      and a field split across two bytes.
1049      Such cases are not supposed to be able to occur.  */
1050
1051   if (MEM_P (op0))
1052     {
1053       machine_mode mode = GET_MODE (op0);
1054       if (GET_MODE_BITSIZE (mode) == 0
1055           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1056         mode = word_mode;
1057       mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1058                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1059
1060       if (mode == VOIDmode)
1061         {
1062           /* The only way this should occur is if the field spans word
1063              boundaries.  */
1064           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1065                                  bitregion_end, value);
1066           return;
1067         }
1068
1069       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1070     }
1071
1072   store_fixed_bit_field_1 (op0, bitsize, bitnum, value);
1073 }
1074
1075 /* Helper function for store_fixed_bit_field, stores
1076    the bit field always using the MODE of OP0.  */
1077
1078 static void
1079 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1080                          unsigned HOST_WIDE_INT bitnum,
1081                          rtx value)
1082 {
1083   machine_mode mode;
1084   rtx temp;
1085   int all_zero = 0;
1086   int all_one = 0;
1087
1088   mode = GET_MODE (op0);
1089   gcc_assert (SCALAR_INT_MODE_P (mode));
1090
1091   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1092      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1093
1094   if (BYTES_BIG_ENDIAN)
1095     /* BITNUM is the distance between our msb
1096        and that of the containing datum.
1097        Convert it to the distance from the lsb.  */
1098     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1099
1100   /* Now BITNUM is always the distance between our lsb
1101      and that of OP0.  */
1102
1103   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1104      we must first convert its mode to MODE.  */
1105
1106   if (CONST_INT_P (value))
1107     {
1108       unsigned HOST_WIDE_INT v = UINTVAL (value);
1109
1110       if (bitsize < HOST_BITS_PER_WIDE_INT)
1111         v &= ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1;
1112
1113       if (v == 0)
1114         all_zero = 1;
1115       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1116                 && v == ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1)
1117                || (bitsize == HOST_BITS_PER_WIDE_INT
1118                    && v == (unsigned HOST_WIDE_INT) -1))
1119         all_one = 1;
1120
1121       value = lshift_value (mode, v, bitnum);
1122     }
1123   else
1124     {
1125       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1126                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1127
1128       if (GET_MODE (value) != mode)
1129         value = convert_to_mode (mode, value, 1);
1130
1131       if (must_and)
1132         value = expand_binop (mode, and_optab, value,
1133                               mask_rtx (mode, 0, bitsize, 0),
1134                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1135       if (bitnum > 0)
1136         value = expand_shift (LSHIFT_EXPR, mode, value,
1137                               bitnum, NULL_RTX, 1);
1138     }
1139
1140   /* Now clear the chosen bits in OP0,
1141      except that if VALUE is -1 we need not bother.  */
1142   /* We keep the intermediates in registers to allow CSE to combine
1143      consecutive bitfield assignments.  */
1144
1145   temp = force_reg (mode, op0);
1146
1147   if (! all_one)
1148     {
1149       temp = expand_binop (mode, and_optab, temp,
1150                            mask_rtx (mode, bitnum, bitsize, 1),
1151                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1152       temp = force_reg (mode, temp);
1153     }
1154
1155   /* Now logical-or VALUE into OP0, unless it is zero.  */
1156
1157   if (! all_zero)
1158     {
1159       temp = expand_binop (mode, ior_optab, temp, value,
1160                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1161       temp = force_reg (mode, temp);
1162     }
1163
1164   if (op0 != temp)
1165     {
1166       op0 = copy_rtx (op0);
1167       emit_move_insn (op0, temp);
1168     }
1169 }
1170 \f
1171 /* Store a bit field that is split across multiple accessible memory objects.
1172
1173    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1174    BITSIZE is the field width; BITPOS the position of its first bit
1175    (within the word).
1176    VALUE is the value to store.
1177
1178    This does not yet handle fields wider than BITS_PER_WORD.  */
1179
1180 static void
1181 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1182                        unsigned HOST_WIDE_INT bitpos,
1183                        unsigned HOST_WIDE_INT bitregion_start,
1184                        unsigned HOST_WIDE_INT bitregion_end,
1185                        rtx value)
1186 {
1187   unsigned int unit;
1188   unsigned int bitsdone = 0;
1189
1190   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1191      much at a time.  */
1192   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1193     unit = BITS_PER_WORD;
1194   else
1195     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1196
1197   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1198      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1199      again, and we will mutually recurse forever.  */
1200   if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1201     unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1202
1203   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1204      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1205      that VALUE might be a floating-point constant.  */
1206   if (CONSTANT_P (value) && !CONST_INT_P (value))
1207     {
1208       rtx word = gen_lowpart_common (word_mode, value);
1209
1210       if (word && (value != word))
1211         value = word;
1212       else
1213         value = gen_lowpart_common (word_mode,
1214                                     force_reg (GET_MODE (value) != VOIDmode
1215                                                ? GET_MODE (value)
1216                                                : word_mode, value));
1217     }
1218
1219   while (bitsdone < bitsize)
1220     {
1221       unsigned HOST_WIDE_INT thissize;
1222       rtx part, word;
1223       unsigned HOST_WIDE_INT thispos;
1224       unsigned HOST_WIDE_INT offset;
1225
1226       offset = (bitpos + bitsdone) / unit;
1227       thispos = (bitpos + bitsdone) % unit;
1228
1229       /* When region of bytes we can touch is restricted, decrease
1230          UNIT close to the end of the region as needed.  If op0 is a REG
1231          or SUBREG of REG, don't do this, as there can't be data races
1232          on a register and we can expand shorter code in some cases.  */
1233       if (bitregion_end
1234           && unit > BITS_PER_UNIT
1235           && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1236           && !REG_P (op0)
1237           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1238         {
1239           unit = unit / 2;
1240           continue;
1241         }
1242
1243       /* THISSIZE must not overrun a word boundary.  Otherwise,
1244          store_fixed_bit_field will call us again, and we will mutually
1245          recurse forever.  */
1246       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1247       thissize = MIN (thissize, unit - thispos);
1248
1249       if (BYTES_BIG_ENDIAN)
1250         {
1251           /* Fetch successively less significant portions.  */
1252           if (CONST_INT_P (value))
1253             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1254                              >> (bitsize - bitsdone - thissize))
1255                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1256           else
1257             {
1258               int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1259               /* The args are chosen so that the last part includes the
1260                  lsb.  Give extract_bit_field the value it needs (with
1261                  endianness compensation) to fetch the piece we want.  */
1262               part = extract_fixed_bit_field (word_mode, value, thissize,
1263                                               total_bits - bitsize + bitsdone,
1264                                               NULL_RTX, 1);
1265             }
1266         }
1267       else
1268         {
1269           /* Fetch successively more significant portions.  */
1270           if (CONST_INT_P (value))
1271             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1272                              >> bitsdone)
1273                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1274           else
1275             part = extract_fixed_bit_field (word_mode, value, thissize,
1276                                             bitsdone, NULL_RTX, 1);
1277         }
1278
1279       /* If OP0 is a register, then handle OFFSET here.
1280
1281          When handling multiword bitfields, extract_bit_field may pass
1282          down a word_mode SUBREG of a larger REG for a bitfield that actually
1283          crosses a word boundary.  Thus, for a SUBREG, we must find
1284          the current word starting from the base register.  */
1285       if (GET_CODE (op0) == SUBREG)
1286         {
1287           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD)
1288                             + (offset * unit / BITS_PER_WORD);
1289           machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1290           if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1291             word = word_offset ? const0_rtx : op0;
1292           else
1293             word = operand_subword_force (SUBREG_REG (op0), word_offset,
1294                                           GET_MODE (SUBREG_REG (op0)));
1295           offset &= BITS_PER_WORD / unit - 1;
1296         }
1297       else if (REG_P (op0))
1298         {
1299           machine_mode op0_mode = GET_MODE (op0);
1300           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1301             word = offset ? const0_rtx : op0;
1302           else
1303             word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1304                                           GET_MODE (op0));
1305           offset &= BITS_PER_WORD / unit - 1;
1306         }
1307       else
1308         word = op0;
1309
1310       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1311          it is just an out-of-bounds access.  Ignore it.  */
1312       if (word != const0_rtx)
1313         store_fixed_bit_field (word, thissize, offset * unit + thispos,
1314                                bitregion_start, bitregion_end, part);
1315       bitsdone += thissize;
1316     }
1317 }
1318 \f
1319 /* A subroutine of extract_bit_field_1 that converts return value X
1320    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1321    to extract_bit_field.  */
1322
1323 static rtx
1324 convert_extracted_bit_field (rtx x, machine_mode mode,
1325                              machine_mode tmode, bool unsignedp)
1326 {
1327   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1328     return x;
1329
1330   /* If the x mode is not a scalar integral, first convert to the
1331      integer mode of that size and then access it as a floating-point
1332      value via a SUBREG.  */
1333   if (!SCALAR_INT_MODE_P (tmode))
1334     {
1335       machine_mode smode;
1336
1337       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1338       x = convert_to_mode (smode, x, unsignedp);
1339       x = force_reg (smode, x);
1340       return gen_lowpart (tmode, x);
1341     }
1342
1343   return convert_to_mode (tmode, x, unsignedp);
1344 }
1345
1346 /* Try to use an ext(z)v pattern to extract a field from OP0.
1347    Return the extracted value on success, otherwise return null.
1348    EXT_MODE is the mode of the extraction and the other arguments
1349    are as for extract_bit_field.  */
1350
1351 static rtx
1352 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1353                               unsigned HOST_WIDE_INT bitsize,
1354                               unsigned HOST_WIDE_INT bitnum,
1355                               int unsignedp, rtx target,
1356                               machine_mode mode, machine_mode tmode)
1357 {
1358   struct expand_operand ops[4];
1359   rtx spec_target = target;
1360   rtx spec_target_subreg = 0;
1361   machine_mode ext_mode = extv->field_mode;
1362   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1363
1364   if (bitsize == 0 || unit < bitsize)
1365     return NULL_RTX;
1366
1367   if (MEM_P (op0))
1368     /* Get a reference to the first byte of the field.  */
1369     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1370                                 &bitnum);
1371   else
1372     {
1373       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1374       if (BYTES_BIG_ENDIAN)
1375         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1376
1377       /* If op0 is a register, we need it in EXT_MODE to make it
1378          acceptable to the format of ext(z)v.  */
1379       if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1380         return NULL_RTX;
1381       if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1382         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1383     }
1384
1385   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1386      "backwards" from the size of the unit we are extracting from.
1387      Otherwise, we count bits from the most significant on a
1388      BYTES/BITS_BIG_ENDIAN machine.  */
1389
1390   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1391     bitnum = unit - bitsize - bitnum;
1392
1393   if (target == 0)
1394     target = spec_target = gen_reg_rtx (tmode);
1395
1396   if (GET_MODE (target) != ext_mode)
1397     {
1398       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1399          between the mode of the extraction (word_mode) and the target
1400          mode.  Instead, create a temporary and use convert_move to set
1401          the target.  */
1402       if (REG_P (target)
1403           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1404         {
1405           target = gen_lowpart (ext_mode, target);
1406           if (GET_MODE_PRECISION (ext_mode)
1407               > GET_MODE_PRECISION (GET_MODE (spec_target)))
1408             spec_target_subreg = target;
1409         }
1410       else
1411         target = gen_reg_rtx (ext_mode);
1412     }
1413
1414   create_output_operand (&ops[0], target, ext_mode);
1415   create_fixed_operand (&ops[1], op0);
1416   create_integer_operand (&ops[2], bitsize);
1417   create_integer_operand (&ops[3], bitnum);
1418   if (maybe_expand_insn (extv->icode, 4, ops))
1419     {
1420       target = ops[0].value;
1421       if (target == spec_target)
1422         return target;
1423       if (target == spec_target_subreg)
1424         return spec_target;
1425       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1426     }
1427   return NULL_RTX;
1428 }
1429
1430 /* A subroutine of extract_bit_field, with the same arguments.
1431    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1432    if we can find no other means of implementing the operation.
1433    if FALLBACK_P is false, return NULL instead.  */
1434
1435 static rtx
1436 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1437                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1438                      machine_mode mode, machine_mode tmode,
1439                      bool fallback_p)
1440 {
1441   rtx op0 = str_rtx;
1442   machine_mode int_mode;
1443   machine_mode mode1;
1444
1445   if (tmode == VOIDmode)
1446     tmode = mode;
1447
1448   while (GET_CODE (op0) == SUBREG)
1449     {
1450       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1451       op0 = SUBREG_REG (op0);
1452     }
1453
1454   /* If we have an out-of-bounds access to a register, just return an
1455      uninitialized register of the required mode.  This can occur if the
1456      source code contains an out-of-bounds access to a small array.  */
1457   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1458     return gen_reg_rtx (tmode);
1459
1460   if (REG_P (op0)
1461       && mode == GET_MODE (op0)
1462       && bitnum == 0
1463       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1464     {
1465       /* We're trying to extract a full register from itself.  */
1466       return op0;
1467     }
1468
1469   /* See if we can get a better vector mode before extracting.  */
1470   if (VECTOR_MODE_P (GET_MODE (op0))
1471       && !MEM_P (op0)
1472       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1473     {
1474       machine_mode new_mode;
1475
1476       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1477         new_mode = MIN_MODE_VECTOR_FLOAT;
1478       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1479         new_mode = MIN_MODE_VECTOR_FRACT;
1480       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1481         new_mode = MIN_MODE_VECTOR_UFRACT;
1482       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1483         new_mode = MIN_MODE_VECTOR_ACCUM;
1484       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1485         new_mode = MIN_MODE_VECTOR_UACCUM;
1486       else
1487         new_mode = MIN_MODE_VECTOR_INT;
1488
1489       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1490         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1491             && targetm.vector_mode_supported_p (new_mode))
1492           break;
1493       if (new_mode != VOIDmode)
1494         op0 = gen_lowpart (new_mode, op0);
1495     }
1496
1497   /* Use vec_extract patterns for extracting parts of vectors whenever
1498      available.  */
1499   if (VECTOR_MODE_P (GET_MODE (op0))
1500       && !MEM_P (op0)
1501       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1502       && ((bitnum + bitsize - 1) / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
1503           == bitnum / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
1504     {
1505       struct expand_operand ops[3];
1506       machine_mode outermode = GET_MODE (op0);
1507       machine_mode innermode = GET_MODE_INNER (outermode);
1508       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1509       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1510
1511       create_output_operand (&ops[0], target, innermode);
1512       create_input_operand (&ops[1], op0, outermode);
1513       create_integer_operand (&ops[2], pos);
1514       if (maybe_expand_insn (icode, 3, ops))
1515         {
1516           target = ops[0].value;
1517           if (GET_MODE (target) != mode)
1518             return gen_lowpart (tmode, target);
1519           return target;
1520         }
1521     }
1522
1523   /* Make sure we are playing with integral modes.  Pun with subregs
1524      if we aren't.  */
1525   {
1526     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1527     if (imode != GET_MODE (op0))
1528       {
1529         if (MEM_P (op0))
1530           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1531         else if (imode != BLKmode)
1532           {
1533             op0 = gen_lowpart (imode, op0);
1534
1535             /* If we got a SUBREG, force it into a register since we
1536                aren't going to be able to do another SUBREG on it.  */
1537             if (GET_CODE (op0) == SUBREG)
1538               op0 = force_reg (imode, op0);
1539           }
1540         else if (REG_P (op0))
1541           {
1542             rtx reg, subreg;
1543             imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1544                                             MODE_INT);
1545             reg = gen_reg_rtx (imode);
1546             subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1547             emit_move_insn (subreg, op0);
1548             op0 = reg;
1549             bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1550           }
1551         else
1552           {
1553             HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1554             rtx mem = assign_stack_temp (GET_MODE (op0), size);
1555             emit_move_insn (mem, op0);
1556             op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1557           }
1558       }
1559   }
1560
1561   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1562      If that's wrong, the solution is to test for it and set TARGET to 0
1563      if needed.  */
1564
1565   /* Get the mode of the field to use for atomic access or subreg
1566      conversion.  */
1567   mode1 = mode;
1568   if (SCALAR_INT_MODE_P (tmode))
1569     {
1570       machine_mode try_mode = mode_for_size (bitsize,
1571                                                   GET_MODE_CLASS (tmode), 0);
1572       if (try_mode != BLKmode)
1573         mode1 = try_mode;
1574     }
1575   gcc_assert (mode1 != BLKmode);
1576
1577   /* Extraction of a full MODE1 value can be done with a subreg as long
1578      as the least significant bit of the value is the least significant
1579      bit of either OP0 or a word of OP0.  */
1580   if (!MEM_P (op0)
1581       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1582       && bitsize == GET_MODE_BITSIZE (mode1)
1583       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1584     {
1585       rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1586                                      bitnum / BITS_PER_UNIT);
1587       if (sub)
1588         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1589     }
1590
1591   /* Extraction of a full MODE1 value can be done with a load as long as
1592      the field is on a byte boundary and is sufficiently aligned.  */
1593   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1594     {
1595       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1596       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1597     }
1598
1599   /* Handle fields bigger than a word.  */
1600
1601   if (bitsize > BITS_PER_WORD)
1602     {
1603       /* Here we transfer the words of the field
1604          in the order least significant first.
1605          This is because the most significant word is the one which may
1606          be less than full.  */
1607
1608       unsigned int backwards = WORDS_BIG_ENDIAN;
1609       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1610       unsigned int i;
1611       rtx_insn *last;
1612
1613       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1614         target = gen_reg_rtx (mode);
1615
1616       /* In case we're about to clobber a base register or something
1617          (see gcc.c-torture/execute/20040625-1.c).   */
1618       if (reg_mentioned_p (target, str_rtx))
1619         target = gen_reg_rtx (mode);
1620
1621       /* Indicate for flow that the entire target reg is being set.  */
1622       emit_clobber (target);
1623
1624       last = get_last_insn ();
1625       for (i = 0; i < nwords; i++)
1626         {
1627           /* If I is 0, use the low-order word in both field and target;
1628              if I is 1, use the next to lowest word; and so on.  */
1629           /* Word number in TARGET to use.  */
1630           unsigned int wordnum
1631             = (backwards
1632                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1633                : i);
1634           /* Offset from start of field in OP0.  */
1635           unsigned int bit_offset = (backwards
1636                                      ? MAX ((int) bitsize - ((int) i + 1)
1637                                             * BITS_PER_WORD,
1638                                             0)
1639                                      : (int) i * BITS_PER_WORD);
1640           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1641           rtx result_part
1642             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1643                                              bitsize - i * BITS_PER_WORD),
1644                                    bitnum + bit_offset, 1, target_part,
1645                                    mode, word_mode, fallback_p);
1646
1647           gcc_assert (target_part);
1648           if (!result_part)
1649             {
1650               delete_insns_since (last);
1651               return NULL;
1652             }
1653
1654           if (result_part != target_part)
1655             emit_move_insn (target_part, result_part);
1656         }
1657
1658       if (unsignedp)
1659         {
1660           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1661              need to be zero'd out.  */
1662           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1663             {
1664               unsigned int i, total_words;
1665
1666               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1667               for (i = nwords; i < total_words; i++)
1668                 emit_move_insn
1669                   (operand_subword (target,
1670                                     backwards ? total_words - i - 1 : i,
1671                                     1, VOIDmode),
1672                    const0_rtx);
1673             }
1674           return target;
1675         }
1676
1677       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1678       target = expand_shift (LSHIFT_EXPR, mode, target,
1679                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1680       return expand_shift (RSHIFT_EXPR, mode, target,
1681                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1682     }
1683
1684   /* If OP0 is a multi-word register, narrow it to the affected word.
1685      If the region spans two words, defer to extract_split_bit_field.  */
1686   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1687     {
1688       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1689                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1690       bitnum %= BITS_PER_WORD;
1691       if (bitnum + bitsize > BITS_PER_WORD)
1692         {
1693           if (!fallback_p)
1694             return NULL_RTX;
1695           target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1696           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1697         }
1698     }
1699
1700   /* From here on we know the desired field is smaller than a word.
1701      If OP0 is a register, it too fits within a word.  */
1702   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1703   extraction_insn extv;
1704   if (!MEM_P (op0)
1705       /* ??? We could limit the structure size to the part of OP0 that
1706          contains the field, with appropriate checks for endianness
1707          and TRULY_NOOP_TRUNCATION.  */
1708       && get_best_reg_extraction_insn (&extv, pattern,
1709                                        GET_MODE_BITSIZE (GET_MODE (op0)),
1710                                        tmode))
1711     {
1712       rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1713                                                  unsignedp, target, mode,
1714                                                  tmode);
1715       if (result)
1716         return result;
1717     }
1718
1719   /* If OP0 is a memory, try copying it to a register and seeing if a
1720      cheap register alternative is available.  */
1721   if (MEM_P (op0))
1722     {
1723       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1724                                         tmode))
1725         {
1726           rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1727                                                      bitnum, unsignedp,
1728                                                      target, mode,
1729                                                      tmode);
1730           if (result)
1731             return result;
1732         }
1733
1734       rtx_insn *last = get_last_insn ();
1735
1736       /* Try loading part of OP0 into a register and extracting the
1737          bitfield from that.  */
1738       unsigned HOST_WIDE_INT bitpos;
1739       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1740                                                0, 0, tmode, &bitpos);
1741       if (xop0)
1742         {
1743           xop0 = copy_to_reg (xop0);
1744           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1745                                             unsignedp, target,
1746                                             mode, tmode, false);
1747           if (result)
1748             return result;
1749           delete_insns_since (last);
1750         }
1751     }
1752
1753   if (!fallback_p)
1754     return NULL;
1755
1756   /* Find a correspondingly-sized integer field, so we can apply
1757      shifts and masks to it.  */
1758   int_mode = int_mode_for_mode (tmode);
1759   if (int_mode == BLKmode)
1760     int_mode = int_mode_for_mode (mode);
1761   /* Should probably push op0 out to memory and then do a load.  */
1762   gcc_assert (int_mode != BLKmode);
1763
1764   target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
1765                                     target, unsignedp);
1766   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1767 }
1768
1769 /* Generate code to extract a byte-field from STR_RTX
1770    containing BITSIZE bits, starting at BITNUM,
1771    and put it in TARGET if possible (if TARGET is nonzero).
1772    Regardless of TARGET, we return the rtx for where the value is placed.
1773
1774    STR_RTX is the structure containing the byte (a REG or MEM).
1775    UNSIGNEDP is nonzero if this is an unsigned bit field.
1776    MODE is the natural mode of the field value once extracted.
1777    TMODE is the mode the caller would like the value to have;
1778    but the value may be returned with type MODE instead.
1779
1780    If a TARGET is specified and we can store in it at no extra cost,
1781    we do so, and return TARGET.
1782    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1783    if they are equally easy.  */
1784
1785 rtx
1786 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1787                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1788                    machine_mode mode, machine_mode tmode)
1789 {
1790   machine_mode mode1;
1791
1792   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1793   if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1794     mode1 = GET_MODE (str_rtx);
1795   else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1796     mode1 = GET_MODE (target);
1797   else
1798     mode1 = tmode;
1799
1800   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1801     {
1802       /* Extraction of a full MODE1 value can be done with a simple load.
1803          We know here that the field can be accessed with one single
1804          instruction.  For targets that support unaligned memory,
1805          an unaligned access may be necessary.  */
1806       if (bitsize == GET_MODE_BITSIZE (mode1))
1807         {
1808           rtx result = adjust_bitfield_address (str_rtx, mode1,
1809                                                 bitnum / BITS_PER_UNIT);
1810           gcc_assert (bitnum % BITS_PER_UNIT == 0);
1811           return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1812         }
1813
1814       str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1815                                       &bitnum);
1816       gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (mode1));
1817       str_rtx = copy_to_reg (str_rtx);
1818     }
1819
1820   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1821                               target, mode, tmode, true);
1822 }
1823 \f
1824 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1825    from bit BITNUM of OP0.
1826
1827    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1828    If TARGET is nonzero, attempts to store the value there
1829    and return TARGET, but this is not guaranteed.
1830    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1831
1832 static rtx
1833 extract_fixed_bit_field (machine_mode tmode, rtx op0,
1834                          unsigned HOST_WIDE_INT bitsize,
1835                          unsigned HOST_WIDE_INT bitnum, rtx target,
1836                          int unsignedp)
1837 {
1838   if (MEM_P (op0))
1839     {
1840       machine_mode mode
1841         = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1842                          MEM_VOLATILE_P (op0));
1843
1844       if (mode == VOIDmode)
1845         /* The only way this should occur is if the field spans word
1846            boundaries.  */
1847         return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1848
1849       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1850     }
1851
1852   return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1853                                     target, unsignedp);
1854 }
1855
1856 /* Helper function for extract_fixed_bit_field, extracts
1857    the bit field always using the MODE of OP0.  */
1858
1859 static rtx
1860 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
1861                            unsigned HOST_WIDE_INT bitsize,
1862                            unsigned HOST_WIDE_INT bitnum, rtx target,
1863                            int unsignedp)
1864 {
1865   machine_mode mode = GET_MODE (op0);
1866   gcc_assert (SCALAR_INT_MODE_P (mode));
1867
1868   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1869      for invalid input, such as extract equivalent of f5 from
1870      gcc.dg/pr48335-2.c.  */
1871
1872   if (BYTES_BIG_ENDIAN)
1873     /* BITNUM is the distance between our msb and that of OP0.
1874        Convert it to the distance from the lsb.  */
1875     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1876
1877   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1878      We have reduced the big-endian case to the little-endian case.  */
1879
1880   if (unsignedp)
1881     {
1882       if (bitnum)
1883         {
1884           /* If the field does not already start at the lsb,
1885              shift it so it does.  */
1886           /* Maybe propagate the target for the shift.  */
1887           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1888           if (tmode != mode)
1889             subtarget = 0;
1890           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1891         }
1892       /* Convert the value to the desired mode.  */
1893       if (mode != tmode)
1894         op0 = convert_to_mode (tmode, op0, 1);
1895
1896       /* Unless the msb of the field used to be the msb when we shifted,
1897          mask out the upper bits.  */
1898
1899       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
1900         return expand_binop (GET_MODE (op0), and_optab, op0,
1901                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1902                              target, 1, OPTAB_LIB_WIDEN);
1903       return op0;
1904     }
1905
1906   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1907      then arithmetic-shift its lsb to the lsb of the word.  */
1908   op0 = force_reg (mode, op0);
1909
1910   /* Find the narrowest integer mode that contains the field.  */
1911
1912   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1913        mode = GET_MODE_WIDER_MODE (mode))
1914     if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
1915       {
1916         op0 = convert_to_mode (mode, op0, 0);
1917         break;
1918       }
1919
1920   if (mode != tmode)
1921     target = 0;
1922
1923   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
1924     {
1925       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
1926       /* Maybe propagate the target for the shift.  */
1927       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1928       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1929     }
1930
1931   return expand_shift (RSHIFT_EXPR, mode, op0,
1932                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1933 }
1934
1935 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1936    VALUE << BITPOS.  */
1937
1938 static rtx
1939 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
1940               int bitpos)
1941 {
1942   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
1943 }
1944 \f
1945 /* Extract a bit field that is split across two words
1946    and return an RTX for the result.
1947
1948    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1949    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1950    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1951
1952 static rtx
1953 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1954                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1955 {
1956   unsigned int unit;
1957   unsigned int bitsdone = 0;
1958   rtx result = NULL_RTX;
1959   int first = 1;
1960
1961   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1962      much at a time.  */
1963   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1964     unit = BITS_PER_WORD;
1965   else
1966     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1967
1968   while (bitsdone < bitsize)
1969     {
1970       unsigned HOST_WIDE_INT thissize;
1971       rtx part, word;
1972       unsigned HOST_WIDE_INT thispos;
1973       unsigned HOST_WIDE_INT offset;
1974
1975       offset = (bitpos + bitsdone) / unit;
1976       thispos = (bitpos + bitsdone) % unit;
1977
1978       /* THISSIZE must not overrun a word boundary.  Otherwise,
1979          extract_fixed_bit_field will call us again, and we will mutually
1980          recurse forever.  */
1981       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1982       thissize = MIN (thissize, unit - thispos);
1983
1984       /* If OP0 is a register, then handle OFFSET here.
1985
1986          When handling multiword bitfields, extract_bit_field may pass
1987          down a word_mode SUBREG of a larger REG for a bitfield that actually
1988          crosses a word boundary.  Thus, for a SUBREG, we must find
1989          the current word starting from the base register.  */
1990       if (GET_CODE (op0) == SUBREG)
1991         {
1992           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1993           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1994                                         GET_MODE (SUBREG_REG (op0)));
1995           offset = 0;
1996         }
1997       else if (REG_P (op0))
1998         {
1999           word = operand_subword_force (op0, offset, GET_MODE (op0));
2000           offset = 0;
2001         }
2002       else
2003         word = op0;
2004
2005       /* Extract the parts in bit-counting order,
2006          whose meaning is determined by BYTES_PER_UNIT.
2007          OFFSET is in UNITs, and UNIT is in bits.  */
2008       part = extract_fixed_bit_field (word_mode, word, thissize,
2009                                       offset * unit + thispos, 0, 1);
2010       bitsdone += thissize;
2011
2012       /* Shift this part into place for the result.  */
2013       if (BYTES_BIG_ENDIAN)
2014         {
2015           if (bitsize != bitsdone)
2016             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2017                                  bitsize - bitsdone, 0, 1);
2018         }
2019       else
2020         {
2021           if (bitsdone != thissize)
2022             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2023                                  bitsdone - thissize, 0, 1);
2024         }
2025
2026       if (first)
2027         result = part;
2028       else
2029         /* Combine the parts with bitwise or.  This works
2030            because we extracted each part as an unsigned bit field.  */
2031         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2032                                OPTAB_LIB_WIDEN);
2033
2034       first = 0;
2035     }
2036
2037   /* Unsigned bit field: we are done.  */
2038   if (unsignedp)
2039     return result;
2040   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2041   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2042                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2043   return expand_shift (RSHIFT_EXPR, word_mode, result,
2044                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2045 }
2046 \f
2047 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2048    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2049    MODE, fill the upper bits with zeros.  Fail if the layout of either
2050    mode is unknown (as for CC modes) or if the extraction would involve
2051    unprofitable mode punning.  Return the value on success, otherwise
2052    return null.
2053
2054    This is different from gen_lowpart* in these respects:
2055
2056      - the returned value must always be considered an rvalue
2057
2058      - when MODE is wider than SRC_MODE, the extraction involves
2059        a zero extension
2060
2061      - when MODE is smaller than SRC_MODE, the extraction involves
2062        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2063
2064    In other words, this routine performs a computation, whereas the
2065    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2066    operations.  */
2067
2068 rtx
2069 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2070 {
2071   machine_mode int_mode, src_int_mode;
2072
2073   if (mode == src_mode)
2074     return src;
2075
2076   if (CONSTANT_P (src))
2077     {
2078       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2079          fails, it will happily create (subreg (symbol_ref)) or similar
2080          invalid SUBREGs.  */
2081       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2082       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2083       if (ret)
2084         return ret;
2085
2086       if (GET_MODE (src) == VOIDmode
2087           || !validate_subreg (mode, src_mode, src, byte))
2088         return NULL_RTX;
2089
2090       src = force_reg (GET_MODE (src), src);
2091       return gen_rtx_SUBREG (mode, src, byte);
2092     }
2093
2094   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2095     return NULL_RTX;
2096
2097   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2098       && MODES_TIEABLE_P (mode, src_mode))
2099     {
2100       rtx x = gen_lowpart_common (mode, src);
2101       if (x)
2102         return x;
2103     }
2104
2105   src_int_mode = int_mode_for_mode (src_mode);
2106   int_mode = int_mode_for_mode (mode);
2107   if (src_int_mode == BLKmode || int_mode == BLKmode)
2108     return NULL_RTX;
2109
2110   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2111     return NULL_RTX;
2112   if (!MODES_TIEABLE_P (int_mode, mode))
2113     return NULL_RTX;
2114
2115   src = gen_lowpart (src_int_mode, src);
2116   src = convert_modes (int_mode, src_int_mode, src, true);
2117   src = gen_lowpart (mode, src);
2118   return src;
2119 }
2120 \f
2121 /* Add INC into TARGET.  */
2122
2123 void
2124 expand_inc (rtx target, rtx inc)
2125 {
2126   rtx value = expand_binop (GET_MODE (target), add_optab,
2127                             target, inc,
2128                             target, 0, OPTAB_LIB_WIDEN);
2129   if (value != target)
2130     emit_move_insn (target, value);
2131 }
2132
2133 /* Subtract DEC from TARGET.  */
2134
2135 void
2136 expand_dec (rtx target, rtx dec)
2137 {
2138   rtx value = expand_binop (GET_MODE (target), sub_optab,
2139                             target, dec,
2140                             target, 0, OPTAB_LIB_WIDEN);
2141   if (value != target)
2142     emit_move_insn (target, value);
2143 }
2144 \f
2145 /* Output a shift instruction for expression code CODE,
2146    with SHIFTED being the rtx for the value to shift,
2147    and AMOUNT the rtx for the amount to shift by.
2148    Store the result in the rtx TARGET, if that is convenient.
2149    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2150    Return the rtx for where the value is.  */
2151
2152 static rtx
2153 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2154                 rtx amount, rtx target, int unsignedp)
2155 {
2156   rtx op1, temp = 0;
2157   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2158   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2159   optab lshift_optab = ashl_optab;
2160   optab rshift_arith_optab = ashr_optab;
2161   optab rshift_uns_optab = lshr_optab;
2162   optab lrotate_optab = rotl_optab;
2163   optab rrotate_optab = rotr_optab;
2164   machine_mode op1_mode;
2165   machine_mode scalar_mode = mode;
2166   int attempt;
2167   bool speed = optimize_insn_for_speed_p ();
2168
2169   if (VECTOR_MODE_P (mode))
2170     scalar_mode = GET_MODE_INNER (mode);
2171   op1 = amount;
2172   op1_mode = GET_MODE (op1);
2173
2174   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2175      shift amount is a vector, use the vector/vector shift patterns.  */
2176   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2177     {
2178       lshift_optab = vashl_optab;
2179       rshift_arith_optab = vashr_optab;
2180       rshift_uns_optab = vlshr_optab;
2181       lrotate_optab = vrotl_optab;
2182       rrotate_optab = vrotr_optab;
2183     }
2184
2185   /* Previously detected shift-counts computed by NEGATE_EXPR
2186      and shifted in the other direction; but that does not work
2187      on all machines.  */
2188
2189   if (SHIFT_COUNT_TRUNCATED)
2190     {
2191       if (CONST_INT_P (op1)
2192           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2193               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2194         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2195                        % GET_MODE_BITSIZE (scalar_mode));
2196       else if (GET_CODE (op1) == SUBREG
2197                && subreg_lowpart_p (op1)
2198                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2199                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2200         op1 = SUBREG_REG (op1);
2201     }
2202
2203   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2204      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2205      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2206      amount instead.  */
2207   if (rotate
2208       && CONST_INT_P (op1)
2209       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2210                    GET_MODE_BITSIZE (scalar_mode) - 1))
2211     {
2212       op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2213       left = !left;
2214       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2215     }
2216
2217   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2218      Note that this is not the case for bigger values.  For instance a rotation
2219      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2220      0x04030201 (bswapsi).  */
2221   if (rotate
2222       && CONST_INT_P (op1)
2223       && INTVAL (op1) == BITS_PER_UNIT
2224       && GET_MODE_SIZE (scalar_mode) == 2
2225       && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2226     return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2227                                   unsignedp);
2228
2229   if (op1 == const0_rtx)
2230     return shifted;
2231
2232   /* Check whether its cheaper to implement a left shift by a constant
2233      bit count by a sequence of additions.  */
2234   if (code == LSHIFT_EXPR
2235       && CONST_INT_P (op1)
2236       && INTVAL (op1) > 0
2237       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2238       && INTVAL (op1) < MAX_BITS_PER_WORD
2239       && (shift_cost (speed, mode, INTVAL (op1))
2240           > INTVAL (op1) * add_cost (speed, mode))
2241       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2242     {
2243       int i;
2244       for (i = 0; i < INTVAL (op1); i++)
2245         {
2246           temp = force_reg (mode, shifted);
2247           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2248                                   unsignedp, OPTAB_LIB_WIDEN);
2249         }
2250       return shifted;
2251     }
2252
2253   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2254     {
2255       enum optab_methods methods;
2256
2257       if (attempt == 0)
2258         methods = OPTAB_DIRECT;
2259       else if (attempt == 1)
2260         methods = OPTAB_WIDEN;
2261       else
2262         methods = OPTAB_LIB_WIDEN;
2263
2264       if (rotate)
2265         {
2266           /* Widening does not work for rotation.  */
2267           if (methods == OPTAB_WIDEN)
2268             continue;
2269           else if (methods == OPTAB_LIB_WIDEN)
2270             {
2271               /* If we have been unable to open-code this by a rotation,
2272                  do it as the IOR of two shifts.  I.e., to rotate A
2273                  by N bits, compute
2274                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2275                  where C is the bitsize of A.
2276
2277                  It is theoretically possible that the target machine might
2278                  not be able to perform either shift and hence we would
2279                  be making two libcalls rather than just the one for the
2280                  shift (similarly if IOR could not be done).  We will allow
2281                  this extremely unlikely lossage to avoid complicating the
2282                  code below.  */
2283
2284               rtx subtarget = target == shifted ? 0 : target;
2285               rtx new_amount, other_amount;
2286               rtx temp1;
2287
2288               new_amount = op1;
2289               if (op1 == const0_rtx)
2290                 return shifted;
2291               else if (CONST_INT_P (op1))
2292                 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2293                                         - INTVAL (op1));
2294               else
2295                 {
2296                   other_amount
2297                     = simplify_gen_unary (NEG, GET_MODE (op1),
2298                                           op1, GET_MODE (op1));
2299                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2300                   other_amount
2301                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2302                                            gen_int_mode (mask, GET_MODE (op1)));
2303                 }
2304
2305               shifted = force_reg (mode, shifted);
2306
2307               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2308                                      mode, shifted, new_amount, 0, 1);
2309               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2310                                       mode, shifted, other_amount,
2311                                       subtarget, 1);
2312               return expand_binop (mode, ior_optab, temp, temp1, target,
2313                                    unsignedp, methods);
2314             }
2315
2316           temp = expand_binop (mode,
2317                                left ? lrotate_optab : rrotate_optab,
2318                                shifted, op1, target, unsignedp, methods);
2319         }
2320       else if (unsignedp)
2321         temp = expand_binop (mode,
2322                              left ? lshift_optab : rshift_uns_optab,
2323                              shifted, op1, target, unsignedp, methods);
2324
2325       /* Do arithmetic shifts.
2326          Also, if we are going to widen the operand, we can just as well
2327          use an arithmetic right-shift instead of a logical one.  */
2328       if (temp == 0 && ! rotate
2329           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2330         {
2331           enum optab_methods methods1 = methods;
2332
2333           /* If trying to widen a log shift to an arithmetic shift,
2334              don't accept an arithmetic shift of the same size.  */
2335           if (unsignedp)
2336             methods1 = OPTAB_MUST_WIDEN;
2337
2338           /* Arithmetic shift */
2339
2340           temp = expand_binop (mode,
2341                                left ? lshift_optab : rshift_arith_optab,
2342                                shifted, op1, target, unsignedp, methods1);
2343         }
2344
2345       /* We used to try extzv here for logical right shifts, but that was
2346          only useful for one machine, the VAX, and caused poor code
2347          generation there for lshrdi3, so the code was deleted and a
2348          define_expand for lshrsi3 was added to vax.md.  */
2349     }
2350
2351   gcc_assert (temp);
2352   return temp;
2353 }
2354
2355 /* Output a shift instruction for expression code CODE,
2356    with SHIFTED being the rtx for the value to shift,
2357    and AMOUNT the amount to shift by.
2358    Store the result in the rtx TARGET, if that is convenient.
2359    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2360    Return the rtx for where the value is.  */
2361
2362 rtx
2363 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2364               int amount, rtx target, int unsignedp)
2365 {
2366   return expand_shift_1 (code, mode,
2367                          shifted, GEN_INT (amount), target, unsignedp);
2368 }
2369
2370 /* Output a shift instruction for expression code CODE,
2371    with SHIFTED being the rtx for the value to shift,
2372    and AMOUNT the tree for the amount to shift by.
2373    Store the result in the rtx TARGET, if that is convenient.
2374    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2375    Return the rtx for where the value is.  */
2376
2377 rtx
2378 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2379                        tree amount, rtx target, int unsignedp)
2380 {
2381   return expand_shift_1 (code, mode,
2382                          shifted, expand_normal (amount), target, unsignedp);
2383 }
2384
2385 \f
2386 /* Indicates the type of fixup needed after a constant multiplication.
2387    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2388    the result should be negated, and ADD_VARIANT means that the
2389    multiplicand should be added to the result.  */
2390 enum mult_variant {basic_variant, negate_variant, add_variant};
2391
2392 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2393                         const struct mult_cost *, machine_mode mode);
2394 static bool choose_mult_variant (machine_mode, HOST_WIDE_INT,
2395                                  struct algorithm *, enum mult_variant *, int);
2396 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2397                               const struct algorithm *, enum mult_variant);
2398 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2399 static rtx extract_high_half (machine_mode, rtx);
2400 static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2401 static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
2402                                        int, int);
2403 /* Compute and return the best algorithm for multiplying by T.
2404    The algorithm must cost less than cost_limit
2405    If retval.cost >= COST_LIMIT, no algorithm was found and all
2406    other field of the returned struct are undefined.
2407    MODE is the machine mode of the multiplication.  */
2408
2409 static void
2410 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2411             const struct mult_cost *cost_limit, machine_mode mode)
2412 {
2413   int m;
2414   struct algorithm *alg_in, *best_alg;
2415   struct mult_cost best_cost;
2416   struct mult_cost new_limit;
2417   int op_cost, op_latency;
2418   unsigned HOST_WIDE_INT orig_t = t;
2419   unsigned HOST_WIDE_INT q;
2420   int maxm, hash_index;
2421   bool cache_hit = false;
2422   enum alg_code cache_alg = alg_zero;
2423   bool speed = optimize_insn_for_speed_p ();
2424   machine_mode imode;
2425   struct alg_hash_entry *entry_ptr;
2426
2427   /* Indicate that no algorithm is yet found.  If no algorithm
2428      is found, this value will be returned and indicate failure.  */
2429   alg_out->cost.cost = cost_limit->cost + 1;
2430   alg_out->cost.latency = cost_limit->latency + 1;
2431
2432   if (cost_limit->cost < 0
2433       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2434     return;
2435
2436   /* Be prepared for vector modes.  */
2437   imode = GET_MODE_INNER (mode);
2438
2439   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2440
2441   /* Restrict the bits of "t" to the multiplication's mode.  */
2442   t &= GET_MODE_MASK (imode);
2443
2444   /* t == 1 can be done in zero cost.  */
2445   if (t == 1)
2446     {
2447       alg_out->ops = 1;
2448       alg_out->cost.cost = 0;
2449       alg_out->cost.latency = 0;
2450       alg_out->op[0] = alg_m;
2451       return;
2452     }
2453
2454   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2455      fail now.  */
2456   if (t == 0)
2457     {
2458       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2459         return;
2460       else
2461         {
2462           alg_out->ops = 1;
2463           alg_out->cost.cost = zero_cost (speed);
2464           alg_out->cost.latency = zero_cost (speed);
2465           alg_out->op[0] = alg_zero;
2466           return;
2467         }
2468     }
2469
2470   /* We'll be needing a couple extra algorithm structures now.  */
2471
2472   alg_in = XALLOCA (struct algorithm);
2473   best_alg = XALLOCA (struct algorithm);
2474   best_cost = *cost_limit;
2475
2476   /* Compute the hash index.  */
2477   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2478
2479   /* See if we already know what to do for T.  */
2480   entry_ptr = alg_hash_entry_ptr (hash_index);
2481   if (entry_ptr->t == t
2482       && entry_ptr->mode == mode
2483       && entry_ptr->mode == mode
2484       && entry_ptr->speed == speed
2485       && entry_ptr->alg != alg_unknown)
2486     {
2487       cache_alg = entry_ptr->alg;
2488
2489       if (cache_alg == alg_impossible)
2490         {
2491           /* The cache tells us that it's impossible to synthesize
2492              multiplication by T within entry_ptr->cost.  */
2493           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2494             /* COST_LIMIT is at least as restrictive as the one
2495                recorded in the hash table, in which case we have no
2496                hope of synthesizing a multiplication.  Just
2497                return.  */
2498             return;
2499
2500           /* If we get here, COST_LIMIT is less restrictive than the
2501              one recorded in the hash table, so we may be able to
2502              synthesize a multiplication.  Proceed as if we didn't
2503              have the cache entry.  */
2504         }
2505       else
2506         {
2507           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2508             /* The cached algorithm shows that this multiplication
2509                requires more cost than COST_LIMIT.  Just return.  This
2510                way, we don't clobber this cache entry with
2511                alg_impossible but retain useful information.  */
2512             return;
2513
2514           cache_hit = true;
2515
2516           switch (cache_alg)
2517             {
2518             case alg_shift:
2519               goto do_alg_shift;
2520
2521             case alg_add_t_m2:
2522             case alg_sub_t_m2:
2523               goto do_alg_addsub_t_m2;
2524
2525             case alg_add_factor:
2526             case alg_sub_factor:
2527               goto do_alg_addsub_factor;
2528
2529             case alg_add_t2_m:
2530               goto do_alg_add_t2_m;
2531
2532             case alg_sub_t2_m:
2533               goto do_alg_sub_t2_m;
2534
2535             default:
2536               gcc_unreachable ();
2537             }
2538         }
2539     }
2540
2541   /* If we have a group of zero bits at the low-order part of T, try
2542      multiplying by the remaining bits and then doing a shift.  */
2543
2544   if ((t & 1) == 0)
2545     {
2546     do_alg_shift:
2547       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2548       if (m < maxm)
2549         {
2550           q = t >> m;
2551           /* The function expand_shift will choose between a shift and
2552              a sequence of additions, so the observed cost is given as
2553              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2554           op_cost = m * add_cost (speed, mode);
2555           if (shift_cost (speed, mode, m) < op_cost)
2556             op_cost = shift_cost (speed, mode, m);
2557           new_limit.cost = best_cost.cost - op_cost;
2558           new_limit.latency = best_cost.latency - op_cost;
2559           synth_mult (alg_in, q, &new_limit, mode);
2560
2561           alg_in->cost.cost += op_cost;
2562           alg_in->cost.latency += op_cost;
2563           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2564             {
2565               best_cost = alg_in->cost;
2566               std::swap (alg_in, best_alg);
2567               best_alg->log[best_alg->ops] = m;
2568               best_alg->op[best_alg->ops] = alg_shift;
2569             }
2570
2571           /* See if treating ORIG_T as a signed number yields a better
2572              sequence.  Try this sequence only for a negative ORIG_T
2573              as it would be useless for a non-negative ORIG_T.  */
2574           if ((HOST_WIDE_INT) orig_t < 0)
2575             {
2576               /* Shift ORIG_T as follows because a right shift of a
2577                  negative-valued signed type is implementation
2578                  defined.  */
2579               q = ~(~orig_t >> m);
2580               /* The function expand_shift will choose between a shift
2581                  and a sequence of additions, so the observed cost is
2582                  given as MIN (m * add_cost(speed, mode),
2583                  shift_cost(speed, mode, m)).  */
2584               op_cost = m * add_cost (speed, mode);
2585               if (shift_cost (speed, mode, m) < op_cost)
2586                 op_cost = shift_cost (speed, mode, m);
2587               new_limit.cost = best_cost.cost - op_cost;
2588               new_limit.latency = best_cost.latency - op_cost;
2589               synth_mult (alg_in, q, &new_limit, mode);
2590
2591               alg_in->cost.cost += op_cost;
2592               alg_in->cost.latency += op_cost;
2593               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2594                 {
2595                   best_cost = alg_in->cost;
2596                   std::swap (alg_in, best_alg);
2597                   best_alg->log[best_alg->ops] = m;
2598                   best_alg->op[best_alg->ops] = alg_shift;
2599                 }
2600             }
2601         }
2602       if (cache_hit)
2603         goto done;
2604     }
2605
2606   /* If we have an odd number, add or subtract one.  */
2607   if ((t & 1) != 0)
2608     {
2609       unsigned HOST_WIDE_INT w;
2610
2611     do_alg_addsub_t_m2:
2612       for (w = 1; (w & t) != 0; w <<= 1)
2613         ;
2614       /* If T was -1, then W will be zero after the loop.  This is another
2615          case where T ends with ...111.  Handling this with (T + 1) and
2616          subtract 1 produces slightly better code and results in algorithm
2617          selection much faster than treating it like the ...0111 case
2618          below.  */
2619       if (w == 0
2620           || (w > 2
2621               /* Reject the case where t is 3.
2622                  Thus we prefer addition in that case.  */
2623               && t != 3))
2624         {
2625           /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2626
2627           op_cost = add_cost (speed, mode);
2628           new_limit.cost = best_cost.cost - op_cost;
2629           new_limit.latency = best_cost.latency - op_cost;
2630           synth_mult (alg_in, t + 1, &new_limit, mode);
2631
2632           alg_in->cost.cost += op_cost;
2633           alg_in->cost.latency += op_cost;
2634           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2635             {
2636               best_cost = alg_in->cost;
2637               std::swap (alg_in, best_alg);
2638               best_alg->log[best_alg->ops] = 0;
2639               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2640             }
2641         }
2642       else
2643         {
2644           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2645
2646           op_cost = add_cost (speed, mode);
2647           new_limit.cost = best_cost.cost - op_cost;
2648           new_limit.latency = best_cost.latency - op_cost;
2649           synth_mult (alg_in, t - 1, &new_limit, mode);
2650
2651           alg_in->cost.cost += op_cost;
2652           alg_in->cost.latency += op_cost;
2653           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2654             {
2655               best_cost = alg_in->cost;
2656               std::swap (alg_in, best_alg);
2657               best_alg->log[best_alg->ops] = 0;
2658               best_alg->op[best_alg->ops] = alg_add_t_m2;
2659             }
2660         }
2661
2662       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2663          quickly with a - a * n for some appropriate constant n.  */
2664       m = exact_log2 (-orig_t + 1);
2665       if (m >= 0 && m < maxm)
2666         {
2667           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2668           /* If the target has a cheap shift-and-subtract insn use
2669              that in preference to a shift insn followed by a sub insn.
2670              Assume that the shift-and-sub is "atomic" with a latency
2671              equal to it's cost, otherwise assume that on superscalar
2672              hardware the shift may be executed concurrently with the
2673              earlier steps in the algorithm.  */
2674           if (shiftsub1_cost (speed, mode, m) <= op_cost)
2675             {
2676               op_cost = shiftsub1_cost (speed, mode, m);
2677               op_latency = op_cost;
2678             }
2679           else
2680             op_latency = add_cost (speed, mode);
2681
2682           new_limit.cost = best_cost.cost - op_cost;
2683           new_limit.latency = best_cost.latency - op_latency;
2684           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2685                       &new_limit, mode);
2686
2687           alg_in->cost.cost += op_cost;
2688           alg_in->cost.latency += op_latency;
2689           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2690             {
2691               best_cost = alg_in->cost;
2692               std::swap (alg_in, best_alg);
2693               best_alg->log[best_alg->ops] = m;
2694               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2695             }
2696         }
2697
2698       if (cache_hit)
2699         goto done;
2700     }
2701
2702   /* Look for factors of t of the form
2703      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2704      If we find such a factor, we can multiply by t using an algorithm that
2705      multiplies by q, shift the result by m and add/subtract it to itself.
2706
2707      We search for large factors first and loop down, even if large factors
2708      are less probable than small; if we find a large factor we will find a
2709      good sequence quickly, and therefore be able to prune (by decreasing
2710      COST_LIMIT) the search.  */
2711
2712  do_alg_addsub_factor:
2713   for (m = floor_log2 (t - 1); m >= 2; m--)
2714     {
2715       unsigned HOST_WIDE_INT d;
2716
2717       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2718       if (t % d == 0 && t > d && m < maxm
2719           && (!cache_hit || cache_alg == alg_add_factor))
2720         {
2721           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2722           if (shiftadd_cost (speed, mode, m) <= op_cost)
2723             op_cost = shiftadd_cost (speed, mode, m);
2724
2725           op_latency = op_cost;
2726
2727
2728           new_limit.cost = best_cost.cost - op_cost;
2729           new_limit.latency = best_cost.latency - op_latency;
2730           synth_mult (alg_in, t / d, &new_limit, mode);
2731
2732           alg_in->cost.cost += op_cost;
2733           alg_in->cost.latency += op_latency;
2734           if (alg_in->cost.latency < op_cost)
2735             alg_in->cost.latency = op_cost;
2736           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2737             {
2738               best_cost = alg_in->cost;
2739               std::swap (alg_in, best_alg);
2740               best_alg->log[best_alg->ops] = m;
2741               best_alg->op[best_alg->ops] = alg_add_factor;
2742             }
2743           /* Other factors will have been taken care of in the recursion.  */
2744           break;
2745         }
2746
2747       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2748       if (t % d == 0 && t > d && m < maxm
2749           && (!cache_hit || cache_alg == alg_sub_factor))
2750         {
2751           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2752           if (shiftsub0_cost (speed, mode, m) <= op_cost)
2753             op_cost = shiftsub0_cost (speed, mode, m);
2754
2755           op_latency = op_cost;
2756
2757           new_limit.cost = best_cost.cost - op_cost;
2758           new_limit.latency = best_cost.latency - op_latency;
2759           synth_mult (alg_in, t / d, &new_limit, mode);
2760
2761           alg_in->cost.cost += op_cost;
2762           alg_in->cost.latency += op_latency;
2763           if (alg_in->cost.latency < op_cost)
2764             alg_in->cost.latency = op_cost;
2765           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2766             {
2767               best_cost = alg_in->cost;
2768               std::swap (alg_in, best_alg);
2769               best_alg->log[best_alg->ops] = m;
2770               best_alg->op[best_alg->ops] = alg_sub_factor;
2771             }
2772           break;
2773         }
2774     }
2775   if (cache_hit)
2776     goto done;
2777
2778   /* Try shift-and-add (load effective address) instructions,
2779      i.e. do a*3, a*5, a*9.  */
2780   if ((t & 1) != 0)
2781     {
2782     do_alg_add_t2_m:
2783       q = t - 1;
2784       q = q & -q;
2785       m = exact_log2 (q);
2786       if (m >= 0 && m < maxm)
2787         {
2788           op_cost = shiftadd_cost (speed, mode, m);
2789           new_limit.cost = best_cost.cost - op_cost;
2790           new_limit.latency = best_cost.latency - op_cost;
2791           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2792
2793           alg_in->cost.cost += op_cost;
2794           alg_in->cost.latency += op_cost;
2795           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2796             {
2797               best_cost = alg_in->cost;
2798               std::swap (alg_in, best_alg);
2799               best_alg->log[best_alg->ops] = m;
2800               best_alg->op[best_alg->ops] = alg_add_t2_m;
2801             }
2802         }
2803       if (cache_hit)
2804         goto done;
2805
2806     do_alg_sub_t2_m:
2807       q = t + 1;
2808       q = q & -q;
2809       m = exact_log2 (q);
2810       if (m >= 0 && m < maxm)
2811         {
2812           op_cost = shiftsub0_cost (speed, mode, m);
2813           new_limit.cost = best_cost.cost - op_cost;
2814           new_limit.latency = best_cost.latency - op_cost;
2815           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2816
2817           alg_in->cost.cost += op_cost;
2818           alg_in->cost.latency += op_cost;
2819           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2820             {
2821               best_cost = alg_in->cost;
2822               std::swap (alg_in, best_alg);
2823               best_alg->log[best_alg->ops] = m;
2824               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2825             }
2826         }
2827       if (cache_hit)
2828         goto done;
2829     }
2830
2831  done:
2832   /* If best_cost has not decreased, we have not found any algorithm.  */
2833   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2834     {
2835       /* We failed to find an algorithm.  Record alg_impossible for
2836          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2837          we are asked to find an algorithm for T within the same or
2838          lower COST_LIMIT, we can immediately return to the
2839          caller.  */
2840       entry_ptr->t = t;
2841       entry_ptr->mode = mode;
2842       entry_ptr->speed = speed;
2843       entry_ptr->alg = alg_impossible;
2844       entry_ptr->cost = *cost_limit;
2845       return;
2846     }
2847
2848   /* Cache the result.  */
2849   if (!cache_hit)
2850     {
2851       entry_ptr->t = t;
2852       entry_ptr->mode = mode;
2853       entry_ptr->speed = speed;
2854       entry_ptr->alg = best_alg->op[best_alg->ops];
2855       entry_ptr->cost.cost = best_cost.cost;
2856       entry_ptr->cost.latency = best_cost.latency;
2857     }
2858
2859   /* If we are getting a too long sequence for `struct algorithm'
2860      to record, make this search fail.  */
2861   if (best_alg->ops == MAX_BITS_PER_WORD)
2862     return;
2863
2864   /* Copy the algorithm from temporary space to the space at alg_out.
2865      We avoid using structure assignment because the majority of
2866      best_alg is normally undefined, and this is a critical function.  */
2867   alg_out->ops = best_alg->ops + 1;
2868   alg_out->cost = best_cost;
2869   memcpy (alg_out->op, best_alg->op,
2870           alg_out->ops * sizeof *alg_out->op);
2871   memcpy (alg_out->log, best_alg->log,
2872           alg_out->ops * sizeof *alg_out->log);
2873 }
2874 \f
2875 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2876    Try three variations:
2877
2878        - a shift/add sequence based on VAL itself
2879        - a shift/add sequence based on -VAL, followed by a negation
2880        - a shift/add sequence based on VAL - 1, followed by an addition.
2881
2882    Return true if the cheapest of these cost less than MULT_COST,
2883    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2884
2885 static bool
2886 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
2887                      struct algorithm *alg, enum mult_variant *variant,
2888                      int mult_cost)
2889 {
2890   struct algorithm alg2;
2891   struct mult_cost limit;
2892   int op_cost;
2893   bool speed = optimize_insn_for_speed_p ();
2894
2895   /* Fail quickly for impossible bounds.  */
2896   if (mult_cost < 0)
2897     return false;
2898
2899   /* Ensure that mult_cost provides a reasonable upper bound.
2900      Any constant multiplication can be performed with less
2901      than 2 * bits additions.  */
2902   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2903   if (mult_cost > op_cost)
2904     mult_cost = op_cost;
2905
2906   *variant = basic_variant;
2907   limit.cost = mult_cost;
2908   limit.latency = mult_cost;
2909   synth_mult (alg, val, &limit, mode);
2910
2911   /* This works only if the inverted value actually fits in an
2912      `unsigned int' */
2913   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2914     {
2915       op_cost = neg_cost (speed, mode);
2916       if (MULT_COST_LESS (&alg->cost, mult_cost))
2917         {
2918           limit.cost = alg->cost.cost - op_cost;
2919           limit.latency = alg->cost.latency - op_cost;
2920         }
2921       else
2922         {
2923           limit.cost = mult_cost - op_cost;
2924           limit.latency = mult_cost - op_cost;
2925         }
2926
2927       synth_mult (&alg2, -val, &limit, mode);
2928       alg2.cost.cost += op_cost;
2929       alg2.cost.latency += op_cost;
2930       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2931         *alg = alg2, *variant = negate_variant;
2932     }
2933
2934   /* This proves very useful for division-by-constant.  */
2935   op_cost = add_cost (speed, mode);
2936   if (MULT_COST_LESS (&alg->cost, mult_cost))
2937     {
2938       limit.cost = alg->cost.cost - op_cost;
2939       limit.latency = alg->cost.latency - op_cost;
2940     }
2941   else
2942     {
2943       limit.cost = mult_cost - op_cost;
2944       limit.latency = mult_cost - op_cost;
2945     }
2946
2947   synth_mult (&alg2, val - 1, &limit, mode);
2948   alg2.cost.cost += op_cost;
2949   alg2.cost.latency += op_cost;
2950   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2951     *alg = alg2, *variant = add_variant;
2952
2953   return MULT_COST_LESS (&alg->cost, mult_cost);
2954 }
2955
2956 /* A subroutine of expand_mult, used for constant multiplications.
2957    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2958    convenient.  Use the shift/add sequence described by ALG and apply
2959    the final fixup specified by VARIANT.  */
2960
2961 static rtx
2962 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
2963                    rtx target, const struct algorithm *alg,
2964                    enum mult_variant variant)
2965 {
2966   HOST_WIDE_INT val_so_far;
2967   rtx_insn *insn;
2968   rtx accum, tem;
2969   int opno;
2970   machine_mode nmode;
2971
2972   /* Avoid referencing memory over and over and invalid sharing
2973      on SUBREGs.  */
2974   op0 = force_reg (mode, op0);
2975
2976   /* ACCUM starts out either as OP0 or as a zero, depending on
2977      the first operation.  */
2978
2979   if (alg->op[0] == alg_zero)
2980     {
2981       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2982       val_so_far = 0;
2983     }
2984   else if (alg->op[0] == alg_m)
2985     {
2986       accum = copy_to_mode_reg (mode, op0);
2987       val_so_far = 1;
2988     }
2989   else
2990     gcc_unreachable ();
2991
2992   for (opno = 1; opno < alg->ops; opno++)
2993     {
2994       int log = alg->log[opno];
2995       rtx shift_subtarget = optimize ? 0 : accum;
2996       rtx add_target
2997         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2998            && !optimize)
2999           ? target : 0;
3000       rtx accum_target = optimize ? 0 : accum;
3001       rtx accum_inner;
3002
3003       switch (alg->op[opno])
3004         {
3005         case alg_shift:
3006           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3007           /* REG_EQUAL note will be attached to the following insn.  */
3008           emit_move_insn (accum, tem);
3009           val_so_far <<= log;
3010           break;
3011
3012         case alg_add_t_m2:
3013           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3014           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3015                                  add_target ? add_target : accum_target);
3016           val_so_far += (HOST_WIDE_INT) 1 << log;
3017           break;
3018
3019         case alg_sub_t_m2:
3020           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3021           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3022                                  add_target ? add_target : accum_target);
3023           val_so_far -= (HOST_WIDE_INT) 1 << log;
3024           break;
3025
3026         case alg_add_t2_m:
3027           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3028                                 log, shift_subtarget, 0);
3029           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3030                                  add_target ? add_target : accum_target);
3031           val_so_far = (val_so_far << log) + 1;
3032           break;
3033
3034         case alg_sub_t2_m:
3035           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3036                                 log, shift_subtarget, 0);
3037           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3038                                  add_target ? add_target : accum_target);
3039           val_so_far = (val_so_far << log) - 1;
3040           break;
3041
3042         case alg_add_factor:
3043           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3044           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3045                                  add_target ? add_target : accum_target);
3046           val_so_far += val_so_far << log;
3047           break;
3048
3049         case alg_sub_factor:
3050           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3051           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3052                                  (add_target
3053                                   ? add_target : (optimize ? 0 : tem)));
3054           val_so_far = (val_so_far << log) - val_so_far;
3055           break;
3056
3057         default:
3058           gcc_unreachable ();
3059         }
3060
3061       if (SCALAR_INT_MODE_P (mode))
3062         {
3063           /* Write a REG_EQUAL note on the last insn so that we can cse
3064              multiplication sequences.  Note that if ACCUM is a SUBREG,
3065              we've set the inner register and must properly indicate that.  */
3066           tem = op0, nmode = mode;
3067           accum_inner = accum;
3068           if (GET_CODE (accum) == SUBREG)
3069             {
3070               accum_inner = SUBREG_REG (accum);
3071               nmode = GET_MODE (accum_inner);
3072               tem = gen_lowpart (nmode, op0);
3073             }
3074
3075           insn = get_last_insn ();
3076           set_dst_reg_note (insn, REG_EQUAL,
3077                             gen_rtx_MULT (nmode, tem,
3078                                           gen_int_mode (val_so_far, nmode)),
3079                             accum_inner);
3080         }
3081     }
3082
3083   if (variant == negate_variant)
3084     {
3085       val_so_far = -val_so_far;
3086       accum = expand_unop (mode, neg_optab, accum, target, 0);
3087     }
3088   else if (variant == add_variant)
3089     {
3090       val_so_far = val_so_far + 1;
3091       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3092     }
3093
3094   /* Compare only the bits of val and val_so_far that are significant
3095      in the result mode, to avoid sign-/zero-extension confusion.  */
3096   nmode = GET_MODE_INNER (mode);
3097   val &= GET_MODE_MASK (nmode);
3098   val_so_far &= GET_MODE_MASK (nmode);
3099   gcc_assert (val == val_so_far);
3100
3101   return accum;
3102 }
3103
3104 /* Perform a multiplication and return an rtx for the result.
3105    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3106    TARGET is a suggestion for where to store the result (an rtx).
3107
3108    We check specially for a constant integer as OP1.
3109    If you want this check for OP0 as well, then before calling
3110    you should swap the two operands if OP0 would be constant.  */
3111
3112 rtx
3113 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3114              int unsignedp)
3115 {
3116   enum mult_variant variant;
3117   struct algorithm algorithm;
3118   rtx scalar_op1;
3119   int max_cost;
3120   bool speed = optimize_insn_for_speed_p ();
3121   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3122
3123   if (CONSTANT_P (op0))
3124     std::swap (op0, op1);
3125
3126   /* For vectors, there are several simplifications that can be made if
3127      all elements of the vector constant are identical.  */
3128   scalar_op1 = unwrap_const_vec_duplicate (op1);
3129
3130   if (INTEGRAL_MODE_P (mode))
3131     {
3132       rtx fake_reg;
3133       HOST_WIDE_INT coeff;
3134       bool is_neg;
3135       int mode_bitsize;
3136
3137       if (op1 == CONST0_RTX (mode))
3138         return op1;
3139       if (op1 == CONST1_RTX (mode))
3140         return op0;
3141       if (op1 == CONSTM1_RTX (mode))
3142         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3143                             op0, target, 0);
3144
3145       if (do_trapv)
3146         goto skip_synth;
3147
3148       /* If mode is integer vector mode, check if the backend supports
3149          vector lshift (by scalar or vector) at all.  If not, we can't use
3150          synthetized multiply.  */
3151       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3152           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3153           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3154         goto skip_synth;
3155
3156       /* These are the operations that are potentially turned into
3157          a sequence of shifts and additions.  */
3158       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3159
3160       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3161          less than or equal in size to `unsigned int' this doesn't matter.
3162          If the mode is larger than `unsigned int', then synth_mult works
3163          only if the constant value exactly fits in an `unsigned int' without
3164          any truncation.  This means that multiplying by negative values does
3165          not work; results are off by 2^32 on a 32 bit machine.  */
3166       if (CONST_INT_P (scalar_op1))
3167         {
3168           coeff = INTVAL (scalar_op1);
3169           is_neg = coeff < 0;
3170         }
3171 #if TARGET_SUPPORTS_WIDE_INT
3172       else if (CONST_WIDE_INT_P (scalar_op1))
3173 #else
3174       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3175 #endif
3176         {
3177           int shift = wi::exact_log2 (std::make_pair (scalar_op1, mode));
3178           /* Perfect power of 2 (other than 1, which is handled above).  */
3179           if (shift > 0)
3180             return expand_shift (LSHIFT_EXPR, mode, op0,
3181                                  shift, target, unsignedp);
3182           else
3183             goto skip_synth;
3184         }
3185       else
3186         goto skip_synth;
3187
3188       /* We used to test optimize here, on the grounds that it's better to
3189          produce a smaller program when -O is not used.  But this causes
3190          such a terrible slowdown sometimes that it seems better to always
3191          use synth_mult.  */
3192
3193       /* Special case powers of two.  */
3194       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3195           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3196         return expand_shift (LSHIFT_EXPR, mode, op0,
3197                              floor_log2 (coeff), target, unsignedp);
3198
3199       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3200
3201       /* Attempt to handle multiplication of DImode values by negative
3202          coefficients, by performing the multiplication by a positive
3203          multiplier and then inverting the result.  */
3204       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3205         {
3206           /* Its safe to use -coeff even for INT_MIN, as the
3207              result is interpreted as an unsigned coefficient.
3208              Exclude cost of op0 from max_cost to match the cost
3209              calculation of the synth_mult.  */
3210           coeff = -(unsigned HOST_WIDE_INT) coeff;
3211           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3212                                     mode, speed)
3213                       - neg_cost (speed, mode));
3214           if (max_cost <= 0)
3215             goto skip_synth;
3216
3217           /* Special case powers of two.  */
3218           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3219             {
3220               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3221                                        floor_log2 (coeff), target, unsignedp);
3222               return expand_unop (mode, neg_optab, temp, target, 0);
3223             }
3224
3225           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3226                                    max_cost))
3227             {
3228               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3229                                             &algorithm, variant);
3230               return expand_unop (mode, neg_optab, temp, target, 0);
3231             }
3232           goto skip_synth;
3233         }
3234
3235       /* Exclude cost of op0 from max_cost to match the cost
3236          calculation of the synth_mult.  */
3237       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3238       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3239         return expand_mult_const (mode, op0, coeff, target,
3240                                   &algorithm, variant);
3241     }
3242  skip_synth:
3243
3244   /* Expand x*2.0 as x+x.  */
3245   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3246       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3247     {
3248       op0 = force_reg (GET_MODE (op0), op0);
3249       return expand_binop (mode, add_optab, op0, op0,
3250                            target, unsignedp, OPTAB_LIB_WIDEN);
3251     }
3252
3253   /* This used to use umul_optab if unsigned, but for non-widening multiply
3254      there is no difference between signed and unsigned.  */
3255   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3256                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3257   gcc_assert (op0);
3258   return op0;
3259 }
3260
3261 /* Return a cost estimate for multiplying a register by the given
3262    COEFFicient in the given MODE and SPEED.  */
3263
3264 int
3265 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3266 {
3267   int max_cost;
3268   struct algorithm algorithm;
3269   enum mult_variant variant;
3270
3271   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3272   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3273                            mode, speed);
3274   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3275     return algorithm.cost.cost;
3276   else
3277     return max_cost;
3278 }
3279
3280 /* Perform a widening multiplication and return an rtx for the result.
3281    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3282    TARGET is a suggestion for where to store the result (an rtx).
3283    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3284    or smul_widen_optab.
3285
3286    We check specially for a constant integer as OP1, comparing the
3287    cost of a widening multiply against the cost of a sequence of shifts
3288    and adds.  */
3289
3290 rtx
3291 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3292                       int unsignedp, optab this_optab)
3293 {
3294   bool speed = optimize_insn_for_speed_p ();
3295   rtx cop1;
3296
3297   if (CONST_INT_P (op1)
3298       && GET_MODE (op0) != VOIDmode
3299       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3300                                 this_optab == umul_widen_optab))
3301       && CONST_INT_P (cop1)
3302       && (INTVAL (cop1) >= 0
3303           || HWI_COMPUTABLE_MODE_P (mode)))
3304     {
3305       HOST_WIDE_INT coeff = INTVAL (cop1);
3306       int max_cost;
3307       enum mult_variant variant;
3308       struct algorithm algorithm;
3309
3310       if (coeff == 0)
3311         return CONST0_RTX (mode);
3312
3313       /* Special case powers of two.  */
3314       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3315         {
3316           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3317           return expand_shift (LSHIFT_EXPR, mode, op0,
3318                                floor_log2 (coeff), target, unsignedp);
3319         }
3320
3321       /* Exclude cost of op0 from max_cost to match the cost
3322          calculation of the synth_mult.  */
3323       max_cost = mul_widen_cost (speed, mode);
3324       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3325                                max_cost))
3326         {
3327           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3328           return expand_mult_const (mode, op0, coeff, target,
3329                                     &algorithm, variant);
3330         }
3331     }
3332   return expand_binop (mode, this_optab, op0, op1, target,
3333                        unsignedp, OPTAB_LIB_WIDEN);
3334 }
3335 \f
3336 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3337    replace division by D, and put the least significant N bits of the result
3338    in *MULTIPLIER_PTR and return the most significant bit.
3339
3340    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3341    needed precision is in PRECISION (should be <= N).
3342
3343    PRECISION should be as small as possible so this function can choose
3344    multiplier more freely.
3345
3346    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3347    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3348
3349    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3350    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3351
3352 unsigned HOST_WIDE_INT
3353 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3354                    unsigned HOST_WIDE_INT *multiplier_ptr,
3355                    int *post_shift_ptr, int *lgup_ptr)
3356 {
3357   int lgup, post_shift;
3358   int pow, pow2;
3359
3360   /* lgup = ceil(log2(divisor)); */
3361   lgup = ceil_log2 (d);
3362
3363   gcc_assert (lgup <= n);
3364
3365   pow = n + lgup;
3366   pow2 = n + lgup - precision;
3367
3368   /* mlow = 2^(N + lgup)/d */
3369   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3370   wide_int mlow = wi::udiv_trunc (val, d);
3371
3372   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3373   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3374   wide_int mhigh = wi::udiv_trunc (val, d);
3375
3376   /* If precision == N, then mlow, mhigh exceed 2^N
3377      (but they do not exceed 2^(N+1)).  */
3378
3379   /* Reduce to lowest terms.  */
3380   for (post_shift = lgup; post_shift > 0; post_shift--)
3381     {
3382       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3383                                                        HOST_BITS_PER_WIDE_INT);
3384       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3385                                                        HOST_BITS_PER_WIDE_INT);
3386       if (ml_lo >= mh_lo)
3387         break;
3388
3389       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3390       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3391     }
3392
3393   *post_shift_ptr = post_shift;
3394   *lgup_ptr = lgup;
3395   if (n < HOST_BITS_PER_WIDE_INT)
3396     {
3397       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3398       *multiplier_ptr = mhigh.to_uhwi () & mask;
3399       return mhigh.to_uhwi () >= mask;
3400     }
3401   else
3402     {
3403       *multiplier_ptr = mhigh.to_uhwi ();
3404       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3405     }
3406 }
3407
3408 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3409    congruent to 1 (mod 2**N).  */
3410
3411 static unsigned HOST_WIDE_INT
3412 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3413 {
3414   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3415
3416   /* The algorithm notes that the choice y = x satisfies
3417      x*y == 1 mod 2^3, since x is assumed odd.
3418      Each iteration doubles the number of bits of significance in y.  */
3419
3420   unsigned HOST_WIDE_INT mask;
3421   unsigned HOST_WIDE_INT y = x;
3422   int nbit = 3;
3423
3424   mask = (n == HOST_BITS_PER_WIDE_INT
3425           ? ~(unsigned HOST_WIDE_INT) 0
3426           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3427
3428   while (nbit < n)
3429     {
3430       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3431       nbit *= 2;
3432     }
3433   return y;
3434 }
3435
3436 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3437    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3438    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3439    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3440    become signed.
3441
3442    The result is put in TARGET if that is convenient.
3443
3444    MODE is the mode of operation.  */
3445
3446 rtx
3447 expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
3448                              rtx op1, rtx target, int unsignedp)
3449 {
3450   rtx tem;
3451   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3452
3453   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3454                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3455   tem = expand_and (mode, tem, op1, NULL_RTX);
3456   adj_operand
3457     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3458                      adj_operand);
3459
3460   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3461                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3462   tem = expand_and (mode, tem, op0, NULL_RTX);
3463   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3464                           target);
3465
3466   return target;
3467 }
3468
3469 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3470
3471 static rtx
3472 extract_high_half (machine_mode mode, rtx op)
3473 {
3474   machine_mode wider_mode;
3475
3476   if (mode == word_mode)
3477     return gen_highpart (mode, op);
3478
3479   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3480
3481   wider_mode = GET_MODE_WIDER_MODE (mode);
3482   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3483                      GET_MODE_BITSIZE (mode), 0, 1);
3484   return convert_modes (mode, wider_mode, op, 0);
3485 }
3486
3487 /* Like expmed_mult_highpart, but only consider using a multiplication
3488    optab.  OP1 is an rtx for the constant operand.  */
3489
3490 static rtx
3491 expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
3492                             rtx target, int unsignedp, int max_cost)
3493 {
3494   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3495   machine_mode wider_mode;
3496   optab moptab;
3497   rtx tem;
3498   int size;
3499   bool speed = optimize_insn_for_speed_p ();
3500
3501   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3502
3503   wider_mode = GET_MODE_WIDER_MODE (mode);
3504   size = GET_MODE_BITSIZE (mode);
3505
3506   /* Firstly, try using a multiplication insn that only generates the needed
3507      high part of the product, and in the sign flavor of unsignedp.  */
3508   if (mul_highpart_cost (speed, mode) < max_cost)
3509     {
3510       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3511       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3512                           unsignedp, OPTAB_DIRECT);
3513       if (tem)
3514         return tem;
3515     }
3516
3517   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3518      Need to adjust the result after the multiplication.  */
3519   if (size - 1 < BITS_PER_WORD
3520       && (mul_highpart_cost (speed, mode)
3521           + 2 * shift_cost (speed, mode, size-1)
3522           + 4 * add_cost (speed, mode) < max_cost))
3523     {
3524       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3525       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3526                           unsignedp, OPTAB_DIRECT);
3527       if (tem)
3528         /* We used the wrong signedness.  Adjust the result.  */
3529         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3530                                             tem, unsignedp);
3531     }
3532
3533   /* Try widening multiplication.  */
3534   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3535   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3536       && mul_widen_cost (speed, wider_mode) < max_cost)
3537     {
3538       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3539                           unsignedp, OPTAB_WIDEN);
3540       if (tem)
3541         return extract_high_half (mode, tem);
3542     }
3543
3544   /* Try widening the mode and perform a non-widening multiplication.  */
3545   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3546       && size - 1 < BITS_PER_WORD
3547       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3548           < max_cost))
3549     {
3550       rtx_insn *insns;
3551       rtx wop0, wop1;
3552
3553       /* We need to widen the operands, for example to ensure the
3554          constant multiplier is correctly sign or zero extended.
3555          Use a sequence to clean-up any instructions emitted by
3556          the conversions if things don't work out.  */
3557       start_sequence ();
3558       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3559       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3560       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3561                           unsignedp, OPTAB_WIDEN);
3562       insns = get_insns ();
3563       end_sequence ();
3564
3565       if (tem)
3566         {
3567           emit_insn (insns);
3568           return extract_high_half (mode, tem);
3569         }
3570     }
3571
3572   /* Try widening multiplication of opposite signedness, and adjust.  */
3573   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3574   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3575       && size - 1 < BITS_PER_WORD
3576       && (mul_widen_cost (speed, wider_mode)
3577           + 2 * shift_cost (speed, mode, size-1)
3578           + 4 * add_cost (speed, mode) < max_cost))
3579     {
3580       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3581                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3582       if (tem != 0)
3583         {
3584           tem = extract_high_half (mode, tem);
3585           /* We used the wrong signedness.  Adjust the result.  */
3586           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3587                                               target, unsignedp);
3588         }
3589     }
3590
3591   return 0;
3592 }
3593
3594 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3595    putting the high half of the result in TARGET if that is convenient,
3596    and return where the result is.  If the operation can not be performed,
3597    0 is returned.
3598
3599    MODE is the mode of operation and result.
3600
3601    UNSIGNEDP nonzero means unsigned multiply.
3602
3603    MAX_COST is the total allowed cost for the expanded RTL.  */
3604
3605 static rtx
3606 expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
3607                       rtx target, int unsignedp, int max_cost)
3608 {
3609   machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3610   unsigned HOST_WIDE_INT cnst1;
3611   int extra_cost;
3612   bool sign_adjust = false;
3613   enum mult_variant variant;
3614   struct algorithm alg;
3615   rtx tem;
3616   bool speed = optimize_insn_for_speed_p ();
3617
3618   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3619   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3620   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3621
3622   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3623
3624   /* We can't optimize modes wider than BITS_PER_WORD.
3625      ??? We might be able to perform double-word arithmetic if
3626      mode == word_mode, however all the cost calculations in
3627      synth_mult etc. assume single-word operations.  */
3628   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3629     return expmed_mult_highpart_optab (mode, op0, op1, target,
3630                                        unsignedp, max_cost);
3631
3632   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3633
3634   /* Check whether we try to multiply by a negative constant.  */
3635   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3636     {
3637       sign_adjust = true;
3638       extra_cost += add_cost (speed, mode);
3639     }
3640
3641   /* See whether shift/add multiplication is cheap enough.  */
3642   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3643                            max_cost - extra_cost))
3644     {
3645       /* See whether the specialized multiplication optabs are
3646          cheaper than the shift/add version.  */
3647       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3648                                         alg.cost.cost + extra_cost);
3649       if (tem)
3650         return tem;
3651
3652       tem = convert_to_mode (wider_mode, op0, unsignedp);
3653       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3654       tem = extract_high_half (mode, tem);
3655
3656       /* Adjust result for signedness.  */
3657       if (sign_adjust)
3658         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3659
3660       return tem;
3661     }
3662   return expmed_mult_highpart_optab (mode, op0, op1, target,
3663                                      unsignedp, max_cost);
3664 }
3665
3666
3667 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3668
3669 static rtx
3670 expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3671 {
3672   rtx result, temp, shift;
3673   rtx_code_label *label;
3674   int logd;
3675   int prec = GET_MODE_PRECISION (mode);
3676
3677   logd = floor_log2 (d);
3678   result = gen_reg_rtx (mode);
3679
3680   /* Avoid conditional branches when they're expensive.  */
3681   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3682       && optimize_insn_for_speed_p ())
3683     {
3684       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3685                                       mode, 0, -1);
3686       if (signmask)
3687         {
3688           HOST_WIDE_INT masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3689           signmask = force_reg (mode, signmask);
3690           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3691
3692           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3693              which instruction sequence to use.  If logical right shifts
3694              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3695              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3696
3697           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3698           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3699               || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
3700                   > COSTS_N_INSNS (2)))
3701             {
3702               temp = expand_binop (mode, xor_optab, op0, signmask,
3703                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3704               temp = expand_binop (mode, sub_optab, temp, signmask,
3705                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3706               temp = expand_binop (mode, and_optab, temp,
3707                                    gen_int_mode (masklow, mode),
3708                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3709               temp = expand_binop (mode, xor_optab, temp, signmask,
3710                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3711               temp = expand_binop (mode, sub_optab, temp, signmask,
3712                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3713             }
3714           else
3715             {
3716               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3717                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3718               signmask = force_reg (mode, signmask);
3719
3720               temp = expand_binop (mode, add_optab, op0, signmask,
3721                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3722               temp = expand_binop (mode, and_optab, temp,
3723                                    gen_int_mode (masklow, mode),
3724                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3725               temp = expand_binop (mode, sub_optab, temp, signmask,
3726                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3727             }
3728           return temp;
3729         }
3730     }
3731
3732   /* Mask contains the mode's signbit and the significant bits of the
3733      modulus.  By including the signbit in the operation, many targets
3734      can avoid an explicit compare operation in the following comparison
3735      against zero.  */
3736   wide_int mask = wi::mask (logd, false, prec);
3737   mask = wi::set_bit (mask, prec - 1);
3738
3739   temp = expand_binop (mode, and_optab, op0,
3740                        immed_wide_int_const (mask, mode),
3741                        result, 1, OPTAB_LIB_WIDEN);
3742   if (temp != result)
3743     emit_move_insn (result, temp);
3744
3745   label = gen_label_rtx ();
3746   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3747
3748   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3749                        0, OPTAB_LIB_WIDEN);
3750
3751   mask = wi::mask (logd, true, prec);
3752   temp = expand_binop (mode, ior_optab, temp,
3753                        immed_wide_int_const (mask, mode),
3754                        result, 1, OPTAB_LIB_WIDEN);
3755   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3756                        0, OPTAB_LIB_WIDEN);
3757   if (temp != result)
3758     emit_move_insn (result, temp);
3759   emit_label (label);
3760   return result;
3761 }
3762
3763 /* Expand signed division of OP0 by a power of two D in mode MODE.
3764    This routine is only called for positive values of D.  */
3765
3766 static rtx
3767 expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3768 {
3769   rtx temp;
3770   rtx_code_label *label;
3771   int logd;
3772
3773   logd = floor_log2 (d);
3774
3775   if (d == 2
3776       && BRANCH_COST (optimize_insn_for_speed_p (),
3777                       false) >= 1)
3778     {
3779       temp = gen_reg_rtx (mode);
3780       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3781       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3782                            0, OPTAB_LIB_WIDEN);
3783       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3784     }
3785
3786   if (HAVE_conditional_move
3787       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
3788     {
3789       rtx temp2;
3790
3791       start_sequence ();
3792       temp2 = copy_to_mode_reg (mode, op0);
3793       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3794                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3795       temp = force_reg (mode, temp);
3796
3797       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3798       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3799                                      mode, temp, temp2, mode, 0);
3800       if (temp2)
3801         {
3802           rtx_insn *seq = get_insns ();
3803           end_sequence ();
3804           emit_insn (seq);
3805           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3806         }
3807       end_sequence ();
3808     }
3809
3810   if (BRANCH_COST (optimize_insn_for_speed_p (),
3811                    false) >= 2)
3812     {
3813       int ushift = GET_MODE_BITSIZE (mode) - logd;
3814
3815       temp = gen_reg_rtx (mode);
3816       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3817       if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3818           || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3819              > COSTS_N_INSNS (1))
3820         temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3821                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3822       else
3823         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3824                              ushift, NULL_RTX, 1);
3825       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3826                            0, OPTAB_LIB_WIDEN);
3827       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3828     }
3829
3830   label = gen_label_rtx ();
3831   temp = copy_to_mode_reg (mode, op0);
3832   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3833   expand_inc (temp, gen_int_mode (d - 1, mode));
3834   emit_label (label);
3835   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3836 }
3837 \f
3838 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3839    if that is convenient, and returning where the result is.
3840    You may request either the quotient or the remainder as the result;
3841    specify REM_FLAG nonzero to get the remainder.
3842
3843    CODE is the expression code for which kind of division this is;
3844    it controls how rounding is done.  MODE is the machine mode to use.
3845    UNSIGNEDP nonzero means do unsigned division.  */
3846
3847 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3848    and then correct it by or'ing in missing high bits
3849    if result of ANDI is nonzero.
3850    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3851    This could optimize to a bfexts instruction.
3852    But C doesn't use these operations, so their optimizations are
3853    left for later.  */
3854 /* ??? For modulo, we don't actually need the highpart of the first product,
3855    the low part will do nicely.  And for small divisors, the second multiply
3856    can also be a low-part only multiply or even be completely left out.
3857    E.g. to calculate the remainder of a division by 3 with a 32 bit
3858    multiply, multiply with 0x55555556 and extract the upper two bits;
3859    the result is exact for inputs up to 0x1fffffff.
3860    The input range can be reduced by using cross-sum rules.
3861    For odd divisors >= 3, the following table gives right shift counts
3862    so that if a number is shifted by an integer multiple of the given
3863    amount, the remainder stays the same:
3864    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3865    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3866    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3867    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3868    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3869
3870    Cross-sum rules for even numbers can be derived by leaving as many bits
3871    to the right alone as the divisor has zeros to the right.
3872    E.g. if x is an unsigned 32 bit number:
3873    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3874    */
3875
3876 rtx
3877 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
3878                rtx op0, rtx op1, rtx target, int unsignedp)
3879 {
3880   machine_mode compute_mode;
3881   rtx tquotient;
3882   rtx quotient = 0, remainder = 0;
3883   rtx_insn *last;
3884   int size;
3885   rtx_insn *insn;
3886   optab optab1, optab2;
3887   int op1_is_constant, op1_is_pow2 = 0;
3888   int max_cost, extra_cost;
3889   static HOST_WIDE_INT last_div_const = 0;
3890   bool speed = optimize_insn_for_speed_p ();
3891
3892   op1_is_constant = CONST_INT_P (op1);
3893   if (op1_is_constant)
3894     {
3895       unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1);
3896       if (unsignedp)
3897         ext_op1 &= GET_MODE_MASK (mode);
3898       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3899                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3900     }
3901
3902   /*
3903      This is the structure of expand_divmod:
3904
3905      First comes code to fix up the operands so we can perform the operations
3906      correctly and efficiently.
3907
3908      Second comes a switch statement with code specific for each rounding mode.
3909      For some special operands this code emits all RTL for the desired
3910      operation, for other cases, it generates only a quotient and stores it in
3911      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3912      to indicate that it has not done anything.
3913
3914      Last comes code that finishes the operation.  If QUOTIENT is set and
3915      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3916      QUOTIENT is not set, it is computed using trunc rounding.
3917
3918      We try to generate special code for division and remainder when OP1 is a
3919      constant.  If |OP1| = 2**n we can use shifts and some other fast
3920      operations.  For other values of OP1, we compute a carefully selected
3921      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3922      by m.
3923
3924      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3925      half of the product.  Different strategies for generating the product are
3926      implemented in expmed_mult_highpart.
3927
3928      If what we actually want is the remainder, we generate that by another
3929      by-constant multiplication and a subtraction.  */
3930
3931   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3932      code below will malfunction if we are, so check here and handle
3933      the special case if so.  */
3934   if (op1 == const1_rtx)
3935     return rem_flag ? const0_rtx : op0;
3936
3937     /* When dividing by -1, we could get an overflow.
3938      negv_optab can handle overflows.  */
3939   if (! unsignedp && op1 == constm1_rtx)
3940     {
3941       if (rem_flag)
3942         return const0_rtx;
3943       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
3944                           ? negv_optab : neg_optab, op0, target, 0);
3945     }
3946
3947   if (target
3948       /* Don't use the function value register as a target
3949          since we have to read it as well as write it,
3950          and function-inlining gets confused by this.  */
3951       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3952           /* Don't clobber an operand while doing a multi-step calculation.  */
3953           || ((rem_flag || op1_is_constant)
3954               && (reg_mentioned_p (target, op0)
3955                   || (MEM_P (op0) && MEM_P (target))))
3956           || reg_mentioned_p (target, op1)
3957           || (MEM_P (op1) && MEM_P (target))))
3958     target = 0;
3959
3960   /* Get the mode in which to perform this computation.  Normally it will
3961      be MODE, but sometimes we can't do the desired operation in MODE.
3962      If so, pick a wider mode in which we can do the operation.  Convert
3963      to that mode at the start to avoid repeated conversions.
3964
3965      First see what operations we need.  These depend on the expression
3966      we are evaluating.  (We assume that divxx3 insns exist under the
3967      same conditions that modxx3 insns and that these insns don't normally
3968      fail.  If these assumptions are not correct, we may generate less
3969      efficient code in some cases.)
3970
3971      Then see if we find a mode in which we can open-code that operation
3972      (either a division, modulus, or shift).  Finally, check for the smallest
3973      mode for which we can do the operation with a library call.  */
3974
3975   /* We might want to refine this now that we have division-by-constant
3976      optimization.  Since expmed_mult_highpart tries so many variants, it is
3977      not straightforward to generalize this.  Maybe we should make an array
3978      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3979
3980   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3981             ? (unsignedp ? lshr_optab : ashr_optab)
3982             : (unsignedp ? udiv_optab : sdiv_optab));
3983   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3984             ? optab1
3985             : (unsignedp ? udivmod_optab : sdivmod_optab));
3986
3987   for (compute_mode = mode; compute_mode != VOIDmode;
3988        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3989     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3990         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3991       break;
3992
3993   if (compute_mode == VOIDmode)
3994     for (compute_mode = mode; compute_mode != VOIDmode;
3995          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3996       if (optab_libfunc (optab1, compute_mode)
3997           || optab_libfunc (optab2, compute_mode))
3998         break;
3999
4000   /* If we still couldn't find a mode, use MODE, but expand_binop will
4001      probably die.  */
4002   if (compute_mode == VOIDmode)
4003     compute_mode = mode;
4004
4005   if (target && GET_MODE (target) == compute_mode)
4006     tquotient = target;
4007   else
4008     tquotient = gen_reg_rtx (compute_mode);
4009
4010   size = GET_MODE_BITSIZE (compute_mode);
4011 #if 0
4012   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4013      (mode), and thereby get better code when OP1 is a constant.  Do that
4014      later.  It will require going over all usages of SIZE below.  */
4015   size = GET_MODE_BITSIZE (mode);
4016 #endif
4017
4018   /* Only deduct something for a REM if the last divide done was
4019      for a different constant.   Then set the constant of the last
4020      divide.  */
4021   max_cost = (unsignedp
4022               ? udiv_cost (speed, compute_mode)
4023               : sdiv_cost (speed, compute_mode));
4024   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4025                      && INTVAL (op1) == last_div_const))
4026     max_cost -= (mul_cost (speed, compute_mode)
4027                  + add_cost (speed, compute_mode));
4028
4029   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4030
4031   /* Now convert to the best mode to use.  */
4032   if (compute_mode != mode)
4033     {
4034       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4035       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4036
4037       /* convert_modes may have placed op1 into a register, so we
4038          must recompute the following.  */
4039       op1_is_constant = CONST_INT_P (op1);
4040       op1_is_pow2 = (op1_is_constant
4041                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4042                           || (! unsignedp
4043                               && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1))))));
4044     }
4045
4046   /* If one of the operands is a volatile MEM, copy it into a register.  */
4047
4048   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4049     op0 = force_reg (compute_mode, op0);
4050   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4051     op1 = force_reg (compute_mode, op1);
4052
4053   /* If we need the remainder or if OP1 is constant, we need to
4054      put OP0 in a register in case it has any queued subexpressions.  */
4055   if (rem_flag || op1_is_constant)
4056     op0 = force_reg (compute_mode, op0);
4057
4058   last = get_last_insn ();
4059
4060   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4061   if (unsignedp)
4062     {
4063       if (code == FLOOR_DIV_EXPR)
4064         code = TRUNC_DIV_EXPR;
4065       if (code == FLOOR_MOD_EXPR)
4066         code = TRUNC_MOD_EXPR;
4067       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4068         code = TRUNC_DIV_EXPR;
4069     }
4070
4071   if (op1 != const0_rtx)
4072     switch (code)
4073       {
4074       case TRUNC_MOD_EXPR:
4075       case TRUNC_DIV_EXPR:
4076         if (op1_is_constant)
4077           {
4078             if (unsignedp)
4079               {
4080                 unsigned HOST_WIDE_INT mh, ml;
4081                 int pre_shift, post_shift;
4082                 int dummy;
4083                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4084                                             & GET_MODE_MASK (compute_mode));
4085
4086                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4087                   {
4088                     pre_shift = floor_log2 (d);
4089                     if (rem_flag)
4090                       {
4091                         unsigned HOST_WIDE_INT mask
4092                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4093                         remainder
4094                           = expand_binop (compute_mode, and_optab, op0,
4095                                           gen_int_mode (mask, compute_mode),
4096                                           remainder, 1,
4097                                           OPTAB_LIB_WIDEN);
4098                         if (remainder)
4099                           return gen_lowpart (mode, remainder);
4100                       }
4101                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4102                                              pre_shift, tquotient, 1);
4103                   }
4104                 else if (size <= HOST_BITS_PER_WIDE_INT)
4105                   {
4106                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4107                       {
4108                         /* Most significant bit of divisor is set; emit an scc
4109                            insn.  */
4110                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4111                                                           compute_mode, 1, 1);
4112                       }
4113                     else
4114                       {
4115                         /* Find a suitable multiplier and right shift count
4116                            instead of multiplying with D.  */
4117
4118                         mh = choose_multiplier (d, size, size,
4119                                                 &ml, &post_shift, &dummy);
4120
4121                         /* If the suggested multiplier is more than SIZE bits,
4122                            we can do better for even divisors, using an
4123                            initial right shift.  */
4124                         if (mh != 0 && (d & 1) == 0)
4125                           {
4126                             pre_shift = floor_log2 (d & -d);
4127                             mh = choose_multiplier (d >> pre_shift, size,
4128                                                     size - pre_shift,
4129                                                     &ml, &post_shift, &dummy);
4130                             gcc_assert (!mh);
4131                           }
4132                         else
4133                           pre_shift = 0;
4134
4135                         if (mh != 0)
4136                           {
4137                             rtx t1, t2, t3, t4;
4138
4139                             if (post_shift - 1 >= BITS_PER_WORD)
4140                               goto fail1;
4141
4142                             extra_cost
4143                               = (shift_cost (speed, compute_mode, post_shift - 1)
4144                                  + shift_cost (speed, compute_mode, 1)
4145                                  + 2 * add_cost (speed, compute_mode));
4146                             t1 = expmed_mult_highpart
4147                               (compute_mode, op0,
4148                                gen_int_mode (ml, compute_mode),
4149                                NULL_RTX, 1, max_cost - extra_cost);
4150                             if (t1 == 0)
4151                               goto fail1;
4152                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4153                                                                op0, t1),
4154                                                 NULL_RTX);
4155                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4156                                                t2, 1, NULL_RTX, 1);
4157                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4158                                                               t1, t3),
4159                                                 NULL_RTX);
4160                             quotient = expand_shift
4161                               (RSHIFT_EXPR, compute_mode, t4,
4162                                post_shift - 1, tquotient, 1);
4163                           }
4164                         else
4165                           {
4166                             rtx t1, t2;
4167
4168                             if (pre_shift >= BITS_PER_WORD
4169                                 || post_shift >= BITS_PER_WORD)
4170                               goto fail1;
4171
4172                             t1 = expand_shift
4173                               (RSHIFT_EXPR, compute_mode, op0,
4174                                pre_shift, NULL_RTX, 1);
4175                             extra_cost
4176                               = (shift_cost (speed, compute_mode, pre_shift)
4177                                  + shift_cost (speed, compute_mode, post_shift));
4178                             t2 = expmed_mult_highpart
4179                               (compute_mode, t1,
4180                                gen_int_mode (ml, compute_mode),
4181                                NULL_RTX, 1, max_cost - extra_cost);
4182                             if (t2 == 0)
4183                               goto fail1;
4184                             quotient = expand_shift
4185                               (RSHIFT_EXPR, compute_mode, t2,
4186                                post_shift, tquotient, 1);
4187                           }
4188                       }
4189                   }
4190                 else            /* Too wide mode to use tricky code */
4191                   break;
4192
4193                 insn = get_last_insn ();
4194                 if (insn != last)
4195                   set_dst_reg_note (insn, REG_EQUAL,
4196                                     gen_rtx_UDIV (compute_mode, op0, op1),
4197                                     quotient);
4198               }
4199             else                /* TRUNC_DIV, signed */
4200               {
4201                 unsigned HOST_WIDE_INT ml;
4202                 int lgup, post_shift;
4203                 rtx mlr;
4204                 HOST_WIDE_INT d = INTVAL (op1);
4205                 unsigned HOST_WIDE_INT abs_d;
4206
4207                 /* Since d might be INT_MIN, we have to cast to
4208                    unsigned HOST_WIDE_INT before negating to avoid
4209                    undefined signed overflow.  */
4210                 abs_d = (d >= 0
4211                          ? (unsigned HOST_WIDE_INT) d
4212                          : - (unsigned HOST_WIDE_INT) d);
4213
4214                 /* n rem d = n rem -d */
4215                 if (rem_flag && d < 0)
4216                   {
4217                     d = abs_d;
4218                     op1 = gen_int_mode (abs_d, compute_mode);
4219                   }
4220
4221                 if (d == 1)
4222                   quotient = op0;
4223                 else if (d == -1)
4224                   quotient = expand_unop (compute_mode, neg_optab, op0,
4225                                           tquotient, 0);
4226                 else if (HOST_BITS_PER_WIDE_INT >= size
4227                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4228                   {
4229                     /* This case is not handled correctly below.  */
4230                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4231                                                 compute_mode, 1, 1);
4232                     if (quotient == 0)
4233                       goto fail1;
4234                   }
4235                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4236                          && (rem_flag
4237                              ? smod_pow2_cheap (speed, compute_mode)
4238                              : sdiv_pow2_cheap (speed, compute_mode))
4239                          /* We assume that cheap metric is true if the
4240                             optab has an expander for this mode.  */
4241                          && ((optab_handler ((rem_flag ? smod_optab
4242                                               : sdiv_optab),
4243                                              compute_mode)
4244                               != CODE_FOR_nothing)
4245                              || (optab_handler (sdivmod_optab,
4246                                                 compute_mode)
4247                                  != CODE_FOR_nothing)))
4248                   ;
4249                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4250                   {
4251                     if (rem_flag)
4252                       {
4253                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4254                         if (remainder)
4255                           return gen_lowpart (mode, remainder);
4256                       }
4257
4258                     if (sdiv_pow2_cheap (speed, compute_mode)
4259                         && ((optab_handler (sdiv_optab, compute_mode)
4260                              != CODE_FOR_nothing)
4261                             || (optab_handler (sdivmod_optab, compute_mode)
4262                                 != CODE_FOR_nothing)))
4263                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4264                                                 compute_mode, op0,
4265                                                 gen_int_mode (abs_d,
4266                                                               compute_mode),
4267                                                 NULL_RTX, 0);
4268                     else
4269                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4270
4271                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4272                        negate the quotient.  */
4273                     if (d < 0)
4274                       {
4275                         insn = get_last_insn ();
4276                         if (insn != last
4277                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4278                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4279                           set_dst_reg_note (insn, REG_EQUAL,
4280                                             gen_rtx_DIV (compute_mode, op0,
4281                                                          gen_int_mode
4282                                                            (abs_d,
4283                                                             compute_mode)),
4284                                             quotient);
4285
4286                         quotient = expand_unop (compute_mode, neg_optab,
4287                                                 quotient, quotient, 0);
4288                       }
4289                   }
4290                 else if (size <= HOST_BITS_PER_WIDE_INT)
4291                   {
4292                     choose_multiplier (abs_d, size, size - 1,
4293                                        &ml, &post_shift, &lgup);
4294                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4295                       {
4296                         rtx t1, t2, t3;
4297
4298                         if (post_shift >= BITS_PER_WORD
4299                             || size - 1 >= BITS_PER_WORD)
4300                           goto fail1;
4301
4302                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4303                                       + shift_cost (speed, compute_mode, size - 1)
4304                                       + add_cost (speed, compute_mode));
4305                         t1 = expmed_mult_highpart
4306                           (compute_mode, op0, gen_int_mode (ml, compute_mode),
4307                            NULL_RTX, 0, max_cost - extra_cost);
4308                         if (t1 == 0)
4309                           goto fail1;
4310                         t2 = expand_shift
4311                           (RSHIFT_EXPR, compute_mode, t1,
4312                            post_shift, NULL_RTX, 0);
4313                         t3 = expand_shift
4314                           (RSHIFT_EXPR, compute_mode, op0,
4315                            size - 1, NULL_RTX, 0);
4316                         if (d < 0)
4317                           quotient
4318                             = force_operand (gen_rtx_MINUS (compute_mode,
4319                                                             t3, t2),
4320                                              tquotient);
4321                         else
4322                           quotient
4323                             = force_operand (gen_rtx_MINUS (compute_mode,
4324                                                             t2, t3),
4325                                              tquotient);
4326                       }
4327                     else
4328                       {
4329                         rtx t1, t2, t3, t4;
4330
4331                         if (post_shift >= BITS_PER_WORD
4332                             || size - 1 >= BITS_PER_WORD)
4333                           goto fail1;
4334
4335                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4336                         mlr = gen_int_mode (ml, compute_mode);
4337                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4338                                       + shift_cost (speed, compute_mode, size - 1)
4339                                       + 2 * add_cost (speed, compute_mode));
4340                         t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4341                                                    NULL_RTX, 0,
4342                                                    max_cost - extra_cost);
4343                         if (t1 == 0)
4344                           goto fail1;
4345                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4346                                                           t1, op0),
4347                                             NULL_RTX);
4348                         t3 = expand_shift
4349                           (RSHIFT_EXPR, compute_mode, t2,
4350                            post_shift, NULL_RTX, 0);
4351                         t4 = expand_shift
4352                           (RSHIFT_EXPR, compute_mode, op0,
4353                            size - 1, NULL_RTX, 0);
4354                         if (d < 0)
4355                           quotient
4356                             = force_operand (gen_rtx_MINUS (compute_mode,
4357                                                             t4, t3),
4358                                              tquotient);
4359                         else
4360                           quotient
4361                             = force_operand (gen_rtx_MINUS (compute_mode,
4362                                                             t3, t4),
4363                                              tquotient);
4364                       }
4365                   }
4366                 else            /* Too wide mode to use tricky code */
4367                   break;
4368
4369                 insn = get_last_insn ();
4370                 if (insn != last)
4371                   set_dst_reg_note (insn, REG_EQUAL,
4372                                     gen_rtx_DIV (compute_mode, op0, op1),
4373                                     quotient);
4374               }
4375             break;
4376           }
4377       fail1:
4378         delete_insns_since (last);
4379         break;
4380
4381       case FLOOR_DIV_EXPR:
4382       case FLOOR_MOD_EXPR:
4383       /* We will come here only for signed operations.  */
4384         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4385           {
4386             unsigned HOST_WIDE_INT mh, ml;
4387             int pre_shift, lgup, post_shift;
4388             HOST_WIDE_INT d = INTVAL (op1);
4389
4390             if (d > 0)
4391               {
4392                 /* We could just as easily deal with negative constants here,
4393                    but it does not seem worth the trouble for GCC 2.6.  */
4394                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4395                   {
4396                     pre_shift = floor_log2 (d);
4397                     if (rem_flag)
4398                       {
4399                         unsigned HOST_WIDE_INT mask
4400                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4401                         remainder = expand_binop
4402                           (compute_mode, and_optab, op0,
4403                            gen_int_mode (mask, compute_mode),
4404                            remainder, 0, OPTAB_LIB_WIDEN);
4405                         if (remainder)
4406                           return gen_lowpart (mode, remainder);
4407                       }
4408                     quotient = expand_shift
4409                       (RSHIFT_EXPR, compute_mode, op0,
4410                        pre_shift, tquotient, 0);
4411                   }
4412                 else
4413                   {
4414                     rtx t1, t2, t3, t4;
4415
4416                     mh = choose_multiplier (d, size, size - 1,
4417                                             &ml, &post_shift, &lgup);
4418                     gcc_assert (!mh);
4419
4420                     if (post_shift < BITS_PER_WORD
4421                         && size - 1 < BITS_PER_WORD)
4422                       {
4423                         t1 = expand_shift
4424                           (RSHIFT_EXPR, compute_mode, op0,
4425                            size - 1, NULL_RTX, 0);
4426                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4427                                            NULL_RTX, 0, OPTAB_WIDEN);
4428                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4429                                       + shift_cost (speed, compute_mode, size - 1)
4430                                       + 2 * add_cost (speed, compute_mode));
4431                         t3 = expmed_mult_highpart
4432                           (compute_mode, t2, gen_int_mode (ml, compute_mode),
4433                            NULL_RTX, 1, max_cost - extra_cost);
4434                         if (t3 != 0)
4435                           {
4436                             t4 = expand_shift
4437                               (RSHIFT_EXPR, compute_mode, t3,
4438                                post_shift, NULL_RTX, 1);
4439                             quotient = expand_binop (compute_mode, xor_optab,
4440                                                      t4, t1, tquotient, 0,
4441                                                      OPTAB_WIDEN);
4442                           }
4443                       }
4444                   }
4445               }
4446             else
4447               {
4448                 rtx nsign, t1, t2, t3, t4;
4449                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4450                                                   op0, constm1_rtx), NULL_RTX);
4451                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4452                                    0, OPTAB_WIDEN);
4453                 nsign = expand_shift
4454                   (RSHIFT_EXPR, compute_mode, t2,
4455                    size - 1, NULL_RTX, 0);
4456                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4457                                     NULL_RTX);
4458                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4459                                     NULL_RTX, 0);
4460                 if (t4)
4461                   {
4462                     rtx t5;
4463                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4464                                       NULL_RTX, 0);
4465                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4466                                                             t4, t5),
4467                                               tquotient);
4468                   }
4469               }
4470           }
4471
4472         if (quotient != 0)
4473           break;
4474         delete_insns_since (last);
4475
4476         /* Try using an instruction that produces both the quotient and
4477            remainder, using truncation.  We can easily compensate the quotient
4478            or remainder to get floor rounding, once we have the remainder.
4479            Notice that we compute also the final remainder value here,
4480            and return the result right away.  */
4481         if (target == 0 || GET_MODE (target) != compute_mode)
4482           target = gen_reg_rtx (compute_mode);
4483
4484         if (rem_flag)
4485           {
4486             remainder
4487               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4488             quotient = gen_reg_rtx (compute_mode);
4489           }
4490         else
4491           {
4492             quotient
4493               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4494             remainder = gen_reg_rtx (compute_mode);
4495           }
4496
4497         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4498                                  quotient, remainder, 0))
4499           {
4500             /* This could be computed with a branch-less sequence.
4501                Save that for later.  */
4502             rtx tem;
4503             rtx_code_label *label = gen_label_rtx ();
4504             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4505             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4506                                 NULL_RTX, 0, OPTAB_WIDEN);
4507             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4508             expand_dec (quotient, const1_rtx);
4509             expand_inc (remainder, op1);
4510             emit_label (label);
4511             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4512           }
4513
4514         /* No luck with division elimination or divmod.  Have to do it
4515            by conditionally adjusting op0 *and* the result.  */
4516         {
4517           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4518           rtx adjusted_op0;
4519           rtx tem;
4520
4521           quotient = gen_reg_rtx (compute_mode);
4522           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4523           label1 = gen_label_rtx ();
4524           label2 = gen_label_rtx ();
4525           label3 = gen_label_rtx ();
4526           label4 = gen_label_rtx ();
4527           label5 = gen_label_rtx ();
4528           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4529           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4530           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4531                               quotient, 0, OPTAB_LIB_WIDEN);
4532           if (tem != quotient)
4533             emit_move_insn (quotient, tem);
4534           emit_jump_insn (targetm.gen_jump (label5));
4535           emit_barrier ();
4536           emit_label (label1);
4537           expand_inc (adjusted_op0, const1_rtx);
4538           emit_jump_insn (targetm.gen_jump (label4));
4539           emit_barrier ();
4540           emit_label (label2);
4541           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4542           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4543                               quotient, 0, OPTAB_LIB_WIDEN);
4544           if (tem != quotient)
4545             emit_move_insn (quotient, tem);
4546           emit_jump_insn (targetm.gen_jump (label5));
4547           emit_barrier ();
4548           emit_label (label3);
4549           expand_dec (adjusted_op0, const1_rtx);
4550           emit_label (label4);
4551           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4552                               quotient, 0, OPTAB_LIB_WIDEN);
4553           if (tem != quotient)
4554             emit_move_insn (quotient, tem);
4555           expand_dec (quotient, const1_rtx);
4556           emit_label (label5);
4557         }
4558         break;
4559
4560       case CEIL_DIV_EXPR:
4561       case CEIL_MOD_EXPR:
4562         if (unsignedp)
4563           {
4564             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4565               {
4566                 rtx t1, t2, t3;
4567                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4568                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4569                                    floor_log2 (d), tquotient, 1);
4570                 t2 = expand_binop (compute_mode, and_optab, op0,
4571                                    gen_int_mode (d - 1, compute_mode),
4572                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4573                 t3 = gen_reg_rtx (compute_mode);
4574                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4575                                       compute_mode, 1, 1);
4576                 if (t3 == 0)
4577                   {
4578                     rtx_code_label *lab;
4579                     lab = gen_label_rtx ();
4580                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4581                     expand_inc (t1, const1_rtx);
4582                     emit_label (lab);
4583                     quotient = t1;
4584                   }
4585                 else
4586                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4587                                                           t1, t3),
4588                                             tquotient);
4589                 break;
4590               }
4591
4592             /* Try using an instruction that produces both the quotient and
4593                remainder, using truncation.  We can easily compensate the
4594                quotient or remainder to get ceiling rounding, once we have the
4595                remainder.  Notice that we compute also the final remainder
4596                value here, and return the result right away.  */
4597             if (target == 0 || GET_MODE (target) != compute_mode)
4598               target = gen_reg_rtx (compute_mode);
4599
4600             if (rem_flag)
4601               {
4602                 remainder = (REG_P (target)
4603                              ? target : gen_reg_rtx (compute_mode));
4604                 quotient = gen_reg_rtx (compute_mode);
4605               }
4606             else
4607               {
4608                 quotient = (REG_P (target)
4609                             ? target : gen_reg_rtx (compute_mode));
4610                 remainder = gen_reg_rtx (compute_mode);
4611               }
4612
4613             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4614                                      remainder, 1))
4615               {
4616                 /* This could be computed with a branch-less sequence.
4617                    Save that for later.  */
4618                 rtx_code_label *label = gen_label_rtx ();
4619                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4620                                  compute_mode, label);
4621                 expand_inc (quotient, const1_rtx);
4622                 expand_dec (remainder, op1);
4623                 emit_label (label);
4624                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4625               }
4626
4627             /* No luck with division elimination or divmod.  Have to do it
4628                by conditionally adjusting op0 *and* the result.  */
4629             {
4630               rtx_code_label *label1, *label2;
4631               rtx adjusted_op0, tem;
4632
4633               quotient = gen_reg_rtx (compute_mode);
4634               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4635               label1 = gen_label_rtx ();
4636               label2 = gen_label_rtx ();
4637               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4638                                compute_mode, label1);
4639               emit_move_insn  (quotient, const0_rtx);
4640               emit_jump_insn (targetm.gen_jump (label2));
4641               emit_barrier ();
4642               emit_label (label1);
4643               expand_dec (adjusted_op0, const1_rtx);
4644               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4645                                   quotient, 1, OPTAB_LIB_WIDEN);
4646               if (tem != quotient)
4647                 emit_move_insn (quotient, tem);
4648               expand_inc (quotient, const1_rtx);
4649               emit_label (label2);
4650             }
4651           }
4652         else /* signed */
4653           {
4654             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4655                 && INTVAL (op1) >= 0)
4656               {
4657                 /* This is extremely similar to the code for the unsigned case
4658                    above.  For 2.7 we should merge these variants, but for
4659                    2.6.1 I don't want to touch the code for unsigned since that
4660                    get used in C.  The signed case will only be used by other
4661                    languages (Ada).  */
4662
4663                 rtx t1, t2, t3;
4664                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4665                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4666                                    floor_log2 (d), tquotient, 0);
4667                 t2 = expand_binop (compute_mode, and_optab, op0,
4668                                    gen_int_mode (d - 1, compute_mode),
4669                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4670                 t3 = gen_reg_rtx (compute_mode);
4671                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4672                                       compute_mode, 1, 1);
4673                 if (t3 == 0)
4674                   {
4675                     rtx_code_label *lab;
4676                     lab = gen_label_rtx ();
4677                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4678                     expand_inc (t1, const1_rtx);
4679                     emit_label (lab);
4680                     quotient = t1;
4681                   }
4682                 else
4683                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4684                                                           t1, t3),
4685                                             tquotient);
4686                 break;
4687               }
4688
4689             /* Try using an instruction that produces both the quotient and
4690                remainder, using truncation.  We can easily compensate the
4691                quotient or remainder to get ceiling rounding, once we have the
4692                remainder.  Notice that we compute also the final remainder
4693                value here, and return the result right away.  */
4694             if (target == 0 || GET_MODE (target) != compute_mode)
4695               target = gen_reg_rtx (compute_mode);
4696             if (rem_flag)
4697               {
4698                 remainder= (REG_P (target)
4699                             ? target : gen_reg_rtx (compute_mode));
4700                 quotient = gen_reg_rtx (compute_mode);
4701               }
4702             else
4703               {
4704                 quotient = (REG_P (target)
4705                             ? target : gen_reg_rtx (compute_mode));
4706                 remainder = gen_reg_rtx (compute_mode);
4707               }
4708
4709             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4710                                      remainder, 0))
4711               {
4712                 /* This could be computed with a branch-less sequence.
4713                    Save that for later.  */
4714                 rtx tem;
4715                 rtx_code_label *label = gen_label_rtx ();
4716                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4717                                  compute_mode, label);
4718                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4719                                     NULL_RTX, 0, OPTAB_WIDEN);
4720                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4721                 expand_inc (quotient, const1_rtx);
4722                 expand_dec (remainder, op1);
4723                 emit_label (label);
4724                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4725               }
4726
4727             /* No luck with division elimination or divmod.  Have to do it
4728                by conditionally adjusting op0 *and* the result.  */
4729             {
4730               rtx_code_label *label1, *label2, *label3, *label4, *label5;
4731               rtx adjusted_op0;
4732               rtx tem;
4733
4734               quotient = gen_reg_rtx (compute_mode);
4735               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4736               label1 = gen_label_rtx ();
4737               label2 = gen_label_rtx ();
4738               label3 = gen_label_rtx ();
4739               label4 = gen_label_rtx ();
4740               label5 = gen_label_rtx ();
4741               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4742               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4743                                compute_mode, label1);
4744               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4745                                   quotient, 0, OPTAB_LIB_WIDEN);
4746               if (tem != quotient)
4747                 emit_move_insn (quotient, tem);
4748               emit_jump_insn (targetm.gen_jump (label5));
4749               emit_barrier ();
4750               emit_label (label1);
4751               expand_dec (adjusted_op0, const1_rtx);
4752               emit_jump_insn (targetm.gen_jump (label4));
4753               emit_barrier ();
4754               emit_label (label2);
4755               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4756                                compute_mode, label3);
4757               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4758                                   quotient, 0, OPTAB_LIB_WIDEN);
4759               if (tem != quotient)
4760                 emit_move_insn (quotient, tem);
4761               emit_jump_insn (targetm.gen_jump (label5));
4762               emit_barrier ();
4763               emit_label (label3);
4764               expand_inc (adjusted_op0, const1_rtx);
4765               emit_label (label4);
4766               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4767                                   quotient, 0, OPTAB_LIB_WIDEN);
4768               if (tem != quotient)
4769                 emit_move_insn (quotient, tem);
4770               expand_inc (quotient, const1_rtx);
4771               emit_label (label5);
4772             }
4773           }
4774         break;
4775
4776       case EXACT_DIV_EXPR:
4777         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4778           {
4779             HOST_WIDE_INT d = INTVAL (op1);
4780             unsigned HOST_WIDE_INT ml;
4781             int pre_shift;
4782             rtx t1;
4783
4784             pre_shift = floor_log2 (d & -d);
4785             ml = invert_mod2n (d >> pre_shift, size);
4786             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4787                                pre_shift, NULL_RTX, unsignedp);
4788             quotient = expand_mult (compute_mode, t1,
4789                                     gen_int_mode (ml, compute_mode),
4790                                     NULL_RTX, 1);
4791
4792             insn = get_last_insn ();
4793             set_dst_reg_note (insn, REG_EQUAL,
4794                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4795                                               compute_mode, op0, op1),
4796                               quotient);
4797           }
4798         break;
4799
4800       case ROUND_DIV_EXPR:
4801       case ROUND_MOD_EXPR:
4802         if (unsignedp)
4803           {
4804             rtx tem;
4805             rtx_code_label *label;
4806             label = gen_label_rtx ();
4807             quotient = gen_reg_rtx (compute_mode);
4808             remainder = gen_reg_rtx (compute_mode);
4809             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4810               {
4811                 rtx tem;
4812                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4813                                          quotient, 1, OPTAB_LIB_WIDEN);
4814                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4815                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4816                                           remainder, 1, OPTAB_LIB_WIDEN);
4817               }
4818             tem = plus_constant (compute_mode, op1, -1);
4819             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4820             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4821             expand_inc (quotient, const1_rtx);
4822             expand_dec (remainder, op1);
4823             emit_label (label);
4824           }
4825         else
4826           {
4827             rtx abs_rem, abs_op1, tem, mask;
4828             rtx_code_label *label;
4829             label = gen_label_rtx ();
4830             quotient = gen_reg_rtx (compute_mode);
4831             remainder = gen_reg_rtx (compute_mode);
4832             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4833               {
4834                 rtx tem;
4835                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4836                                          quotient, 0, OPTAB_LIB_WIDEN);
4837                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4838                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4839                                           remainder, 0, OPTAB_LIB_WIDEN);
4840               }
4841             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4842             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4843             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4844                                 1, NULL_RTX, 1);
4845             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4846             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4847                                 NULL_RTX, 0, OPTAB_WIDEN);
4848             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4849                                  size - 1, NULL_RTX, 0);
4850             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4851                                 NULL_RTX, 0, OPTAB_WIDEN);
4852             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4853                                 NULL_RTX, 0, OPTAB_WIDEN);
4854             expand_inc (quotient, tem);
4855             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4856                                 NULL_RTX, 0, OPTAB_WIDEN);
4857             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4858                                 NULL_RTX, 0, OPTAB_WIDEN);
4859             expand_dec (remainder, tem);
4860             emit_label (label);
4861           }
4862         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4863
4864       default:
4865         gcc_unreachable ();
4866       }
4867
4868   if (quotient == 0)
4869     {
4870       if (target && GET_MODE (target) != compute_mode)
4871         target = 0;
4872
4873       if (rem_flag)
4874         {
4875           /* Try to produce the remainder without producing the quotient.
4876              If we seem to have a divmod pattern that does not require widening,
4877              don't try widening here.  We should really have a WIDEN argument
4878              to expand_twoval_binop, since what we'd really like to do here is
4879              1) try a mod insn in compute_mode
4880              2) try a divmod insn in compute_mode
4881              3) try a div insn in compute_mode and multiply-subtract to get
4882                 remainder
4883              4) try the same things with widening allowed.  */
4884           remainder
4885             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4886                                  op0, op1, target,
4887                                  unsignedp,
4888                                  ((optab_handler (optab2, compute_mode)
4889                                    != CODE_FOR_nothing)
4890                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4891           if (remainder == 0)
4892             {
4893               /* No luck there.  Can we do remainder and divide at once
4894                  without a library call?  */
4895               remainder = gen_reg_rtx (compute_mode);
4896               if (! expand_twoval_binop ((unsignedp
4897                                           ? udivmod_optab
4898                                           : sdivmod_optab),
4899                                          op0, op1,
4900                                          NULL_RTX, remainder, unsignedp))
4901                 remainder = 0;
4902             }
4903
4904           if (remainder)
4905             return gen_lowpart (mode, remainder);
4906         }
4907
4908       /* Produce the quotient.  Try a quotient insn, but not a library call.
4909          If we have a divmod in this mode, use it in preference to widening
4910          the div (for this test we assume it will not fail). Note that optab2
4911          is set to the one of the two optabs that the call below will use.  */
4912       quotient
4913         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4914                              op0, op1, rem_flag ? NULL_RTX : target,
4915                              unsignedp,
4916                              ((optab_handler (optab2, compute_mode)
4917                                != CODE_FOR_nothing)
4918                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4919
4920       if (quotient == 0)
4921         {
4922           /* No luck there.  Try a quotient-and-remainder insn,
4923              keeping the quotient alone.  */
4924           quotient = gen_reg_rtx (compute_mode);
4925           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4926                                      op0, op1,
4927                                      quotient, NULL_RTX, unsignedp))
4928             {
4929               quotient = 0;
4930               if (! rem_flag)
4931                 /* Still no luck.  If we are not computing the remainder,
4932                    use a library call for the quotient.  */
4933                 quotient = sign_expand_binop (compute_mode,
4934                                               udiv_optab, sdiv_optab,
4935                                               op0, op1, target,
4936                                               unsignedp, OPTAB_LIB_WIDEN);
4937             }
4938         }
4939     }
4940
4941   if (rem_flag)
4942     {
4943       if (target && GET_MODE (target) != compute_mode)
4944         target = 0;
4945
4946       if (quotient == 0)
4947         {
4948           /* No divide instruction either.  Use library for remainder.  */
4949           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4950                                          op0, op1, target,
4951                                          unsignedp, OPTAB_LIB_WIDEN);
4952           /* No remainder function.  Try a quotient-and-remainder
4953              function, keeping the remainder.  */
4954           if (!remainder)
4955             {
4956               remainder = gen_reg_rtx (compute_mode);
4957               if (!expand_twoval_binop_libfunc
4958                   (unsignedp ? udivmod_optab : sdivmod_optab,
4959                    op0, op1,
4960                    NULL_RTX, remainder,
4961                    unsignedp ? UMOD : MOD))
4962                 remainder = NULL_RTX;
4963             }
4964         }
4965       else
4966         {
4967           /* We divided.  Now finish doing X - Y * (X / Y).  */
4968           remainder = expand_mult (compute_mode, quotient, op1,
4969                                    NULL_RTX, unsignedp);
4970           remainder = expand_binop (compute_mode, sub_optab, op0,
4971                                     remainder, target, unsignedp,
4972                                     OPTAB_LIB_WIDEN);
4973         }
4974     }
4975
4976   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4977 }
4978 \f
4979 /* Return a tree node with data type TYPE, describing the value of X.
4980    Usually this is an VAR_DECL, if there is no obvious better choice.
4981    X may be an expression, however we only support those expressions
4982    generated by loop.c.  */
4983
4984 tree
4985 make_tree (tree type, rtx x)
4986 {
4987   tree t;
4988
4989   switch (GET_CODE (x))
4990     {
4991     case CONST_INT:
4992     case CONST_WIDE_INT:
4993       t = wide_int_to_tree (type, std::make_pair (x, TYPE_MODE (type)));
4994       return t;
4995
4996     case CONST_DOUBLE:
4997       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
4998       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
4999         t = wide_int_to_tree (type,
5000                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5001                                                     HOST_BITS_PER_WIDE_INT * 2));
5002       else
5003         t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5004
5005       return t;
5006
5007     case CONST_VECTOR:
5008       {
5009         int units = CONST_VECTOR_NUNITS (x);
5010         tree itype = TREE_TYPE (type);
5011         tree *elts;
5012         int i;
5013
5014         /* Build a tree with vector elements.  */
5015         elts = XALLOCAVEC (tree, units);
5016         for (i = units - 1; i >= 0; --i)
5017           {
5018             rtx elt = CONST_VECTOR_ELT (x, i);
5019             elts[i] = make_tree (itype, elt);
5020           }
5021
5022         return build_vector (type, elts);
5023       }
5024
5025     case PLUS:
5026       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5027                           make_tree (type, XEXP (x, 1)));
5028
5029     case MINUS:
5030       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5031                           make_tree (type, XEXP (x, 1)));
5032
5033     case NEG:
5034       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5035
5036     case MULT:
5037       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5038                           make_tree (type, XEXP (x, 1)));
5039
5040     case ASHIFT:
5041       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5042                           make_tree (type, XEXP (x, 1)));
5043
5044     case LSHIFTRT:
5045       t = unsigned_type_for (type);
5046       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5047                                          make_tree (t, XEXP (x, 0)),
5048                                          make_tree (type, XEXP (x, 1))));
5049
5050     case ASHIFTRT:
5051       t = signed_type_for (type);
5052       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5053                                          make_tree (t, XEXP (x, 0)),
5054                                          make_tree (type, XEXP (x, 1))));
5055
5056     case DIV:
5057       if (TREE_CODE (type) != REAL_TYPE)
5058         t = signed_type_for (type);
5059       else
5060         t = type;
5061
5062       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5063                                          make_tree (t, XEXP (x, 0)),
5064                                          make_tree (t, XEXP (x, 1))));
5065     case UDIV:
5066       t = unsigned_type_for (type);
5067       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5068                                          make_tree (t, XEXP (x, 0)),
5069                                          make_tree (t, XEXP (x, 1))));
5070
5071     case SIGN_EXTEND:
5072     case ZERO_EXTEND:
5073       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5074                                           GET_CODE (x) == ZERO_EXTEND);
5075       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5076
5077     case CONST:
5078       return make_tree (type, XEXP (x, 0));
5079
5080     case SYMBOL_REF:
5081       t = SYMBOL_REF_DECL (x);
5082       if (t)
5083         return fold_convert (type, build_fold_addr_expr (t));
5084       /* else fall through.  */
5085
5086     default:
5087       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5088
5089       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5090          address mode to pointer mode.  */
5091       if (POINTER_TYPE_P (type))
5092         x = convert_memory_address_addr_space
5093               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5094
5095       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5096          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5097       t->decl_with_rtl.rtl = x;
5098
5099       return t;
5100     }
5101 }
5102 \f
5103 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5104    and returning TARGET.
5105
5106    If TARGET is 0, a pseudo-register or constant is returned.  */
5107
5108 rtx
5109 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5110 {
5111   rtx tem = 0;
5112
5113   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5114     tem = simplify_binary_operation (AND, mode, op0, op1);
5115   if (tem == 0)
5116     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5117
5118   if (target == 0)
5119     target = tem;
5120   else if (tem != target)
5121     emit_move_insn (target, tem);
5122   return target;
5123 }
5124
5125 /* Helper function for emit_store_flag.  */
5126 rtx
5127 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5128              machine_mode mode, machine_mode compare_mode,
5129              int unsignedp, rtx x, rtx y, int normalizep,
5130              machine_mode target_mode)
5131 {
5132   struct expand_operand ops[4];
5133   rtx op0, comparison, subtarget;
5134   rtx_insn *last;
5135   machine_mode result_mode = targetm.cstore_mode (icode);
5136
5137   last = get_last_insn ();
5138   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5139   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5140   if (!x || !y)
5141     {
5142       delete_insns_since (last);
5143       return NULL_RTX;
5144     }
5145
5146   if (target_mode == VOIDmode)
5147     target_mode = result_mode;
5148   if (!target)
5149     target = gen_reg_rtx (target_mode);
5150
5151   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5152
5153   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5154   create_fixed_operand (&ops[1], comparison);
5155   create_fixed_operand (&ops[2], x);
5156   create_fixed_operand (&ops[3], y);
5157   if (!maybe_expand_insn (icode, 4, ops))
5158     {
5159       delete_insns_since (last);
5160       return NULL_RTX;
5161     }
5162   subtarget = ops[0].value;
5163
5164   /* If we are converting to a wider mode, first convert to
5165      TARGET_MODE, then normalize.  This produces better combining
5166      opportunities on machines that have a SIGN_EXTRACT when we are
5167      testing a single bit.  This mostly benefits the 68k.
5168
5169      If STORE_FLAG_VALUE does not have the sign bit set when
5170      interpreted in MODE, we can do this conversion as unsigned, which
5171      is usually more efficient.  */
5172   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5173     {
5174       convert_move (target, subtarget,
5175                     val_signbit_known_clear_p (result_mode,
5176                                                STORE_FLAG_VALUE));
5177       op0 = target;
5178       result_mode = target_mode;
5179     }
5180   else
5181     op0 = subtarget;
5182
5183   /* If we want to keep subexpressions around, don't reuse our last
5184      target.  */
5185   if (optimize)
5186     subtarget = 0;
5187
5188   /* Now normalize to the proper value in MODE.  Sometimes we don't
5189      have to do anything.  */
5190   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5191     ;
5192   /* STORE_FLAG_VALUE might be the most negative number, so write
5193      the comparison this way to avoid a compiler-time warning.  */
5194   else if (- normalizep == STORE_FLAG_VALUE)
5195     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5196
5197   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5198      it hard to use a value of just the sign bit due to ANSI integer
5199      constant typing rules.  */
5200   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5201     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5202                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5203                         normalizep == 1);
5204   else
5205     {
5206       gcc_assert (STORE_FLAG_VALUE & 1);
5207
5208       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5209       if (normalizep == -1)
5210         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5211     }
5212
5213   /* If we were converting to a smaller mode, do the conversion now.  */
5214   if (target_mode != result_mode)
5215     {
5216       convert_move (target, op0, 0);
5217       return target;
5218     }
5219   else
5220     return op0;
5221 }
5222
5223
5224 /* A subroutine of emit_store_flag only including "tricks" that do not
5225    need a recursive call.  These are kept separate to avoid infinite
5226    loops.  */
5227
5228 static rtx
5229 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5230                    machine_mode mode, int unsignedp, int normalizep,
5231                    machine_mode target_mode)
5232 {
5233   rtx subtarget;
5234   enum insn_code icode;
5235   machine_mode compare_mode;
5236   enum mode_class mclass;
5237   enum rtx_code scode;
5238
5239   if (unsignedp)
5240     code = unsigned_condition (code);
5241   scode = swap_condition (code);
5242
5243   /* If one operand is constant, make it the second one.  Only do this
5244      if the other operand is not constant as well.  */
5245
5246   if (swap_commutative_operands_p (op0, op1))
5247     {
5248       std::swap (op0, op1);
5249       code = swap_condition (code);
5250     }
5251
5252   if (mode == VOIDmode)
5253     mode = GET_MODE (op0);
5254
5255   /* For some comparisons with 1 and -1, we can convert this to
5256      comparisons with zero.  This will often produce more opportunities for
5257      store-flag insns.  */
5258
5259   switch (code)
5260     {
5261     case LT:
5262       if (op1 == const1_rtx)
5263         op1 = const0_rtx, code = LE;
5264       break;
5265     case LE:
5266       if (op1 == constm1_rtx)
5267         op1 = const0_rtx, code = LT;
5268       break;
5269     case GE:
5270       if (op1 == const1_rtx)
5271         op1 = const0_rtx, code = GT;
5272       break;
5273     case GT:
5274       if (op1 == constm1_rtx)
5275         op1 = const0_rtx, code = GE;
5276       break;
5277     case GEU:
5278       if (op1 == const1_rtx)
5279         op1 = const0_rtx, code = NE;
5280       break;
5281     case LTU:
5282       if (op1 == const1_rtx)
5283         op1 = const0_rtx, code = EQ;
5284       break;
5285     default:
5286       break;
5287     }
5288
5289   /* If we are comparing a double-word integer with zero or -1, we can
5290      convert the comparison into one involving a single word.  */
5291   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5292       && GET_MODE_CLASS (mode) == MODE_INT
5293       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5294     {
5295       rtx tem;
5296       if ((code == EQ || code == NE)
5297           && (op1 == const0_rtx || op1 == constm1_rtx))
5298         {
5299           rtx op00, op01;
5300
5301           /* Do a logical OR or AND of the two words and compare the
5302              result.  */
5303           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5304           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5305           tem = expand_binop (word_mode,
5306                               op1 == const0_rtx ? ior_optab : and_optab,
5307                               op00, op01, NULL_RTX, unsignedp,
5308                               OPTAB_DIRECT);
5309
5310           if (tem != 0)
5311             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5312                                    unsignedp, normalizep);
5313         }
5314       else if ((code == LT || code == GE) && op1 == const0_rtx)
5315         {
5316           rtx op0h;
5317
5318           /* If testing the sign bit, can just test on high word.  */
5319           op0h = simplify_gen_subreg (word_mode, op0, mode,
5320                                       subreg_highpart_offset (word_mode,
5321                                                               mode));
5322           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5323                                  unsignedp, normalizep);
5324         }
5325       else
5326         tem = NULL_RTX;
5327
5328       if (tem)
5329         {
5330           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5331             return tem;
5332           if (!target)
5333             target = gen_reg_rtx (target_mode);
5334
5335           convert_move (target, tem,
5336                         !val_signbit_known_set_p (word_mode,
5337                                                   (normalizep ? normalizep
5338                                                    : STORE_FLAG_VALUE)));
5339           return target;
5340         }
5341     }
5342
5343   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5344      complement of A (for GE) and shifting the sign bit to the low bit.  */
5345   if (op1 == const0_rtx && (code == LT || code == GE)
5346       && GET_MODE_CLASS (mode) == MODE_INT
5347       && (normalizep || STORE_FLAG_VALUE == 1
5348           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5349     {
5350       subtarget = target;
5351
5352       if (!target)
5353         target_mode = mode;
5354
5355       /* If the result is to be wider than OP0, it is best to convert it
5356          first.  If it is to be narrower, it is *incorrect* to convert it
5357          first.  */
5358       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5359         {
5360           op0 = convert_modes (target_mode, mode, op0, 0);
5361           mode = target_mode;
5362         }
5363
5364       if (target_mode != mode)
5365         subtarget = 0;
5366
5367       if (code == GE)
5368         op0 = expand_unop (mode, one_cmpl_optab, op0,
5369                            ((STORE_FLAG_VALUE == 1 || normalizep)
5370                             ? 0 : subtarget), 0);
5371
5372       if (STORE_FLAG_VALUE == 1 || normalizep)
5373         /* If we are supposed to produce a 0/1 value, we want to do
5374            a logical shift from the sign bit to the low-order bit; for
5375            a -1/0 value, we do an arithmetic shift.  */
5376         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5377                             GET_MODE_BITSIZE (mode) - 1,
5378                             subtarget, normalizep != -1);
5379
5380       if (mode != target_mode)
5381         op0 = convert_modes (target_mode, mode, op0, 0);
5382
5383       return op0;
5384     }
5385
5386   mclass = GET_MODE_CLASS (mode);
5387   for (compare_mode = mode; compare_mode != VOIDmode;
5388        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5389     {
5390      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5391      icode = optab_handler (cstore_optab, optab_mode);
5392      if (icode != CODE_FOR_nothing)
5393         {
5394           do_pending_stack_adjust ();
5395           rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5396                                  unsignedp, op0, op1, normalizep, target_mode);
5397           if (tem)
5398             return tem;
5399
5400           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5401             {
5402               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5403                                  unsignedp, op1, op0, normalizep, target_mode);
5404               if (tem)
5405                 return tem;
5406             }
5407           break;
5408         }
5409     }
5410
5411   return 0;
5412 }
5413
5414 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5415    and storing in TARGET.  Normally return TARGET.
5416    Return 0 if that cannot be done.
5417
5418    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5419    it is VOIDmode, they cannot both be CONST_INT.
5420
5421    UNSIGNEDP is for the case where we have to widen the operands
5422    to perform the operation.  It says to use zero-extension.
5423
5424    NORMALIZEP is 1 if we should convert the result to be either zero
5425    or one.  Normalize is -1 if we should convert the result to be
5426    either zero or -1.  If NORMALIZEP is zero, the result will be left
5427    "raw" out of the scc insn.  */
5428
5429 rtx
5430 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5431                  machine_mode mode, int unsignedp, int normalizep)
5432 {
5433   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5434   enum rtx_code rcode;
5435   rtx subtarget;
5436   rtx tem, trueval;
5437   rtx_insn *last;
5438
5439   /* If we compare constants, we shouldn't use a store-flag operation,
5440      but a constant load.  We can get there via the vanilla route that
5441      usually generates a compare-branch sequence, but will in this case
5442      fold the comparison to a constant, and thus elide the branch.  */
5443   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5444     return NULL_RTX;
5445
5446   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5447                            target_mode);
5448   if (tem)
5449     return tem;
5450
5451   /* If we reached here, we can't do this with a scc insn, however there
5452      are some comparisons that can be done in other ways.  Don't do any
5453      of these cases if branches are very cheap.  */
5454   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5455     return 0;
5456
5457   /* See what we need to return.  We can only return a 1, -1, or the
5458      sign bit.  */
5459
5460   if (normalizep == 0)
5461     {
5462       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5463         normalizep = STORE_FLAG_VALUE;
5464
5465       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5466         ;
5467       else
5468         return 0;
5469     }
5470
5471   last = get_last_insn ();
5472
5473   /* If optimizing, use different pseudo registers for each insn, instead
5474      of reusing the same pseudo.  This leads to better CSE, but slows
5475      down the compiler, since there are more pseudos */
5476   subtarget = (!optimize
5477                && (target_mode == mode)) ? target : NULL_RTX;
5478   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5479
5480   /* For floating-point comparisons, try the reverse comparison or try
5481      changing the "orderedness" of the comparison.  */
5482   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5483     {
5484       enum rtx_code first_code;
5485       bool and_them;
5486
5487       rcode = reverse_condition_maybe_unordered (code);
5488       if (can_compare_p (rcode, mode, ccp_store_flag)
5489           && (code == ORDERED || code == UNORDERED
5490               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5491               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5492         {
5493           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5494                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5495
5496           /* For the reverse comparison, use either an addition or a XOR.  */
5497           if (want_add
5498               && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5499                            optimize_insn_for_speed_p ()) == 0)
5500             {
5501               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5502                                        STORE_FLAG_VALUE, target_mode);
5503               if (tem)
5504                 return expand_binop (target_mode, add_optab, tem,
5505                                      gen_int_mode (normalizep, target_mode),
5506                                      target, 0, OPTAB_WIDEN);
5507             }
5508           else if (!want_add
5509                    && rtx_cost (trueval, mode, XOR, 1,
5510                                 optimize_insn_for_speed_p ()) == 0)
5511             {
5512               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5513                                        normalizep, target_mode);
5514               if (tem)
5515                 return expand_binop (target_mode, xor_optab, tem, trueval,
5516                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5517             }
5518         }
5519
5520       delete_insns_since (last);
5521
5522       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5523       if (code == ORDERED || code == UNORDERED)
5524         return 0;
5525
5526       and_them = split_comparison (code, mode, &first_code, &code);
5527
5528       /* If there are no NaNs, the first comparison should always fall through.
5529          Effectively change the comparison to the other one.  */
5530       if (!HONOR_NANS (mode))
5531         {
5532           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5533           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5534                                     target_mode);
5535         }
5536
5537       if (!HAVE_conditional_move)
5538         return 0;
5539
5540       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5541          conditional move.  */
5542       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5543                                normalizep, target_mode);
5544       if (tem == 0)
5545         return 0;
5546
5547       if (and_them)
5548         tem = emit_conditional_move (target, code, op0, op1, mode,
5549                                      tem, const0_rtx, GET_MODE (tem), 0);
5550       else
5551         tem = emit_conditional_move (target, code, op0, op1, mode,
5552                                      trueval, tem, GET_MODE (tem), 0);
5553
5554       if (tem == 0)
5555         delete_insns_since (last);
5556       return tem;
5557     }
5558
5559   /* The remaining tricks only apply to integer comparisons.  */
5560
5561   if (GET_MODE_CLASS (mode) != MODE_INT)
5562     return 0;
5563
5564   /* If this is an equality comparison of integers, we can try to exclusive-or
5565      (or subtract) the two operands and use a recursive call to try the
5566      comparison with zero.  Don't do any of these cases if branches are
5567      very cheap.  */
5568
5569   if ((code == EQ || code == NE) && op1 != const0_rtx)
5570     {
5571       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5572                           OPTAB_WIDEN);
5573
5574       if (tem == 0)
5575         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5576                             OPTAB_WIDEN);
5577       if (tem != 0)
5578         tem = emit_store_flag (target, code, tem, const0_rtx,
5579                                mode, unsignedp, normalizep);
5580       if (tem != 0)
5581         return tem;
5582
5583       delete_insns_since (last);
5584     }
5585
5586   /* For integer comparisons, try the reverse comparison.  However, for
5587      small X and if we'd have anyway to extend, implementing "X != 0"
5588      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5589   rcode = reverse_condition (code);
5590   if (can_compare_p (rcode, mode, ccp_store_flag)
5591       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5592             && code == NE
5593             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5594             && op1 == const0_rtx))
5595     {
5596       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5597                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5598
5599       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5600       if (want_add
5601           && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5602                        optimize_insn_for_speed_p ()) == 0)
5603         {
5604           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5605                                    STORE_FLAG_VALUE, target_mode);
5606           if (tem != 0)
5607             tem = expand_binop (target_mode, add_optab, tem,
5608                                 gen_int_mode (normalizep, target_mode),
5609                                 target, 0, OPTAB_WIDEN);
5610         }
5611       else if (!want_add
5612                && rtx_cost (trueval, mode, XOR, 1,
5613                             optimize_insn_for_speed_p ()) == 0)
5614         {
5615           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5616                                    normalizep, target_mode);
5617           if (tem != 0)
5618             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5619                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5620         }
5621
5622       if (tem != 0)
5623         return tem;
5624       delete_insns_since (last);
5625     }
5626
5627   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5628      the constant zero.  Reject all other comparisons at this point.  Only
5629      do LE and GT if branches are expensive since they are expensive on
5630      2-operand machines.  */
5631
5632   if (op1 != const0_rtx
5633       || (code != EQ && code != NE
5634           && (BRANCH_COST (optimize_insn_for_speed_p (),
5635                            false) <= 1 || (code != LE && code != GT))))
5636     return 0;
5637
5638   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5639      do the necessary operation below.  */
5640
5641   tem = 0;
5642
5643   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5644      the sign bit set.  */
5645
5646   if (code == LE)
5647     {
5648       /* This is destructive, so SUBTARGET can't be OP0.  */
5649       if (rtx_equal_p (subtarget, op0))
5650         subtarget = 0;
5651
5652       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5653                           OPTAB_WIDEN);
5654       if (tem)
5655         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5656                             OPTAB_WIDEN);
5657     }
5658
5659   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5660      number of bits in the mode of OP0, minus one.  */
5661
5662   if (code == GT)
5663     {
5664       if (rtx_equal_p (subtarget, op0))
5665         subtarget = 0;
5666
5667       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5668                           GET_MODE_BITSIZE (mode) - 1,
5669                           subtarget, 0);
5670       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5671                           OPTAB_WIDEN);
5672     }
5673
5674   if (code == EQ || code == NE)
5675     {
5676       /* For EQ or NE, one way to do the comparison is to apply an operation
5677          that converts the operand into a positive number if it is nonzero
5678          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5679          for NE we negate.  This puts the result in the sign bit.  Then we
5680          normalize with a shift, if needed.
5681
5682          Two operations that can do the above actions are ABS and FFS, so try
5683          them.  If that doesn't work, and MODE is smaller than a full word,
5684          we can use zero-extension to the wider mode (an unsigned conversion)
5685          as the operation.  */
5686
5687       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5688          that is compensated by the subsequent overflow when subtracting
5689          one / negating.  */
5690
5691       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5692         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5693       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5694         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5695       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5696         {
5697           tem = convert_modes (word_mode, mode, op0, 1);
5698           mode = word_mode;
5699         }
5700
5701       if (tem != 0)
5702         {
5703           if (code == EQ)
5704             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5705                                 0, OPTAB_WIDEN);
5706           else
5707             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5708         }
5709
5710       /* If we couldn't do it that way, for NE we can "or" the two's complement
5711          of the value with itself.  For EQ, we take the one's complement of
5712          that "or", which is an extra insn, so we only handle EQ if branches
5713          are expensive.  */
5714
5715       if (tem == 0
5716           && (code == NE
5717               || BRANCH_COST (optimize_insn_for_speed_p (),
5718                               false) > 1))
5719         {
5720           if (rtx_equal_p (subtarget, op0))
5721             subtarget = 0;
5722
5723           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5724           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5725                               OPTAB_WIDEN);
5726
5727           if (tem && code == EQ)
5728             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5729         }
5730     }
5731
5732   if (tem && normalizep)
5733     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5734                         GET_MODE_BITSIZE (mode) - 1,
5735                         subtarget, normalizep == 1);
5736
5737   if (tem)
5738     {
5739       if (!target)
5740         ;
5741       else if (GET_MODE (tem) != target_mode)
5742         {
5743           convert_move (target, tem, 0);
5744           tem = target;
5745         }
5746       else if (!subtarget)
5747         {
5748           emit_move_insn (target, tem);
5749           tem = target;
5750         }
5751     }
5752   else
5753     delete_insns_since (last);
5754
5755   return tem;
5756 }
5757
5758 /* Like emit_store_flag, but always succeeds.  */
5759
5760 rtx
5761 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5762                        machine_mode mode, int unsignedp, int normalizep)
5763 {
5764   rtx tem;
5765   rtx_code_label *label;
5766   rtx trueval, falseval;
5767
5768   /* First see if emit_store_flag can do the job.  */
5769   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5770   if (tem != 0)
5771     return tem;
5772
5773   if (!target)
5774     target = gen_reg_rtx (word_mode);
5775
5776   /* If this failed, we have to do this with set/compare/jump/set code.
5777      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5778   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5779   if (code == NE
5780       && GET_MODE_CLASS (mode) == MODE_INT
5781       && REG_P (target)
5782       && op0 == target
5783       && op1 == const0_rtx)
5784     {
5785       label = gen_label_rtx ();
5786       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
5787                                NULL_RTX, NULL, label, -1);
5788       emit_move_insn (target, trueval);
5789       emit_label (label);
5790       return target;
5791     }
5792
5793   if (!REG_P (target)
5794       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5795     target = gen_reg_rtx (GET_MODE (target));
5796
5797   /* Jump in the right direction if the target cannot implement CODE
5798      but can jump on its reverse condition.  */
5799   falseval = const0_rtx;
5800   if (! can_compare_p (code, mode, ccp_jump)
5801       && (! FLOAT_MODE_P (mode)
5802           || code == ORDERED || code == UNORDERED
5803           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5804           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5805     {
5806       enum rtx_code rcode;
5807       if (FLOAT_MODE_P (mode))
5808         rcode = reverse_condition_maybe_unordered (code);
5809       else
5810         rcode = reverse_condition (code);
5811
5812       /* Canonicalize to UNORDERED for the libcall.  */
5813       if (can_compare_p (rcode, mode, ccp_jump)
5814           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5815         {
5816           falseval = trueval;
5817           trueval = const0_rtx;
5818           code = rcode;
5819         }
5820     }
5821
5822   emit_move_insn (target, trueval);
5823   label = gen_label_rtx ();
5824   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
5825                            label, -1);
5826
5827   emit_move_insn (target, falseval);
5828   emit_label (label);
5829
5830   return target;
5831 }
5832 \f
5833 /* Perform possibly multi-word comparison and conditional jump to LABEL
5834    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5835    now a thin wrapper around do_compare_rtx_and_jump.  */
5836
5837 static void
5838 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
5839                  rtx_code_label *label)
5840 {
5841   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5842   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
5843                            NULL, label, -1);
5844 }