gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2015 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "target.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "predict.h"
  30 #include "df.h"
  31 #include "tm_p.h"
  32 #include "expmed.h"
  33 #include "optabs.h"
  34 #include "emit-rtl.h"
  35 #include "recog.h"
  36 #include "diagnostic-core.h"
  37 #include "alias.h"
  38 #include "fold-const.h"
  39 #include "stor-layout.h"
  40 #include "flags.h"
  41 #include "dojump.h"
  42 #include "explow.h"
  43 #include "calls.h"
  44 #include "varasm.h"
  45 #include "stmt.h"
  46 #include "expr.h"
  47 #include "langhooks.h"
  48
  49 struct target_expmed default_target_expmed;
  50 #if SWITCHABLE_TARGET
  51 struct target_expmed *this_target_expmed = &default_target_expmed;
  52 #endif
  53
  54 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  55                                    unsigned HOST_WIDE_INT,
  56                                    unsigned HOST_WIDE_INT,
  57                                    unsigned HOST_WIDE_INT,
  58                                    rtx);
  59 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
  60                                      unsigned HOST_WIDE_INT,
  61                                      rtx);
  62 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  63                                    unsigned HOST_WIDE_INT,
  64                                    unsigned HOST_WIDE_INT,
  65                                    unsigned HOST_WIDE_INT,
  66                                    rtx);
  67 static rtx extract_fixed_bit_field (machine_mode, rtx,
  68                                     unsigned HOST_WIDE_INT,
  69                                     unsigned HOST_WIDE_INT, rtx, int);
  70 static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
  71                                       unsigned HOST_WIDE_INT,
  72                                       unsigned HOST_WIDE_INT, rtx, int);
  73 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  74 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  75                                     unsigned HOST_WIDE_INT, int);
  76 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  77 static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  78 static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  79
  80 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  81    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  82    The mask is truncated if necessary to the width of mode MODE.  The
  83    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
  84
  85 static inline rtx
  86 mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
  87 {
  88   return immed_wide_int_const
  89     (wi::shifted_mask (bitpos, bitsize, complement,
  90                        GET_MODE_PRECISION (mode)), mode);
  91 }
  92
  93 /* Test whether a value is zero of a power of two.  */
  94 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
  95   (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
  96
  97 struct init_expmed_rtl
  98 {
  99   rtx reg;
 100   rtx plus;
 101   rtx neg;
 102   rtx mult;
 103   rtx sdiv;
 104   rtx udiv;
 105   rtx sdiv_32;
 106   rtx smod_32;
 107   rtx wide_mult;
 108   rtx wide_lshr;
 109   rtx wide_trunc;
 110   rtx shift;
 111   rtx shift_mult;
 112   rtx shift_add;
 113   rtx shift_sub0;
 114   rtx shift_sub1;
 115   rtx zext;
 116   rtx trunc;
 117
 118   rtx pow2[MAX_BITS_PER_WORD];
 119   rtx cint[MAX_BITS_PER_WORD];
 120 };
 121
 122 static void
 123 init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
 124                       machine_mode from_mode, bool speed)
 125 {
 126   int to_size, from_size;
 127   rtx which;
 128
 129   to_size = GET_MODE_PRECISION (to_mode);
 130   from_size = GET_MODE_PRECISION (from_mode);
 131
 132   /* Most partial integers have a precision less than the "full"
 133      integer it requires for storage.  In case one doesn't, for
 134      comparison purposes here, reduce the bit size by one in that
 135      case.  */
 136   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 137       && exact_log2 (to_size) != -1)
 138     to_size --;
 139   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 140       && exact_log2 (from_size) != -1)
 141     from_size --;
 142
 143   /* Assume cost of zero-extend and sign-extend is the same.  */
 144   which = (to_size < from_size ? all->trunc : all->zext);
 145
 146   PUT_MODE (all->reg, from_mode);
 147   set_convert_cost (to_mode, from_mode, speed,
 148                     set_src_cost (which, to_mode, speed));
 149 }
 150
 151 static void
 152 init_expmed_one_mode (struct init_expmed_rtl *all,
 153                       machine_mode mode, int speed)
 154 {
 155   int m, n, mode_bitsize;
 156   machine_mode mode_from;
 157
 158   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 159
 160   PUT_MODE (all->reg, mode);
 161   PUT_MODE (all->plus, mode);
 162   PUT_MODE (all->neg, mode);
 163   PUT_MODE (all->mult, mode);
 164   PUT_MODE (all->sdiv, mode);
 165   PUT_MODE (all->udiv, mode);
 166   PUT_MODE (all->sdiv_32, mode);
 167   PUT_MODE (all->smod_32, mode);
 168   PUT_MODE (all->wide_trunc, mode);
 169   PUT_MODE (all->shift, mode);
 170   PUT_MODE (all->shift_mult, mode);
 171   PUT_MODE (all->shift_add, mode);
 172   PUT_MODE (all->shift_sub0, mode);
 173   PUT_MODE (all->shift_sub1, mode);
 174   PUT_MODE (all->zext, mode);
 175   PUT_MODE (all->trunc, mode);
 176
 177   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
 178   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
 179   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
 180   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
 181   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
 182
 183   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
 184                                      <= 2 * add_cost (speed, mode)));
 185   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
 186                                      <= 4 * add_cost (speed, mode)));
 187
 188   set_shift_cost (speed, mode, 0, 0);
 189   {
 190     int cost = add_cost (speed, mode);
 191     set_shiftadd_cost (speed, mode, 0, cost);
 192     set_shiftsub0_cost (speed, mode, 0, cost);
 193     set_shiftsub1_cost (speed, mode, 0, cost);
 194   }
 195
 196   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 197   for (m = 1; m < n; m++)
 198     {
 199       XEXP (all->shift, 1) = all->cint[m];
 200       XEXP (all->shift_mult, 1) = all->pow2[m];
 201
 202       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
 203       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
 204                                                        speed));
 205       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
 206                                                         speed));
 207       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
 208                                                         speed));
 209     }
 210
 211   if (SCALAR_INT_MODE_P (mode))
 212     {
 213       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 214            mode_from = (machine_mode)(mode_from + 1))
 215         init_expmed_one_conv (all, mode, mode_from, speed);
 216     }
 217   if (GET_MODE_CLASS (mode) == MODE_INT)
 218     {
 219       machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
 220       if (wider_mode != VOIDmode)
 221         {
 222           PUT_MODE (all->zext, wider_mode);
 223           PUT_MODE (all->wide_mult, wider_mode);
 224           PUT_MODE (all->wide_lshr, wider_mode);
 225           XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 226
 227           set_mul_widen_cost (speed, wider_mode,
 228                               set_src_cost (all->wide_mult, wider_mode, speed));
 229           set_mul_highpart_cost (speed, mode,
 230                                  set_src_cost (all->wide_trunc, mode, speed));
 231         }
 232     }
 233 }
 234
 235 void
 236 init_expmed (void)
 237 {
 238   struct init_expmed_rtl all;
 239   machine_mode mode = QImode;
 240   int m, speed;
 241
 242   memset (&all, 0, sizeof all);
 243   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 244     {
 245       all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 246       all.cint[m] = GEN_INT (m);
 247     }
 248
 249   /* Avoid using hard regs in ways which may be unsupported.  */
 250   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 251   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 252   all.neg = gen_rtx_NEG (mode, all.reg);
 253   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 254   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 255   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 256   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 257   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 258   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 259   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 260   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 261   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 262   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 263   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 264   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 265   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 266   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 267   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 268
 269   for (speed = 0; speed < 2; speed++)
 270     {
 271       crtl->maybe_hot_insn_p = speed;
 272       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
 273
 274       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 275            mode = (machine_mode)(mode + 1))
 276         init_expmed_one_mode (&all, mode, speed);
 277
 278       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 279         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 280              mode = (machine_mode)(mode + 1))
 281           init_expmed_one_mode (&all, mode, speed);
 282
 283       if (MIN_MODE_VECTOR_INT != VOIDmode)
 284         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 285              mode = (machine_mode)(mode + 1))
 286           init_expmed_one_mode (&all, mode, speed);
 287     }
 288
 289   if (alg_hash_used_p ())
 290     {
 291       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 292       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 293     }
 294   else
 295     set_alg_hash_used_p (true);
 296   default_rtl_profile ();
 297
 298   ggc_free (all.trunc);
 299   ggc_free (all.shift_sub1);
 300   ggc_free (all.shift_sub0);
 301   ggc_free (all.shift_add);
 302   ggc_free (all.shift_mult);
 303   ggc_free (all.shift);
 304   ggc_free (all.wide_trunc);
 305   ggc_free (all.wide_lshr);
 306   ggc_free (all.wide_mult);
 307   ggc_free (all.zext);
 308   ggc_free (all.smod_32);
 309   ggc_free (all.sdiv_32);
 310   ggc_free (all.udiv);
 311   ggc_free (all.sdiv);
 312   ggc_free (all.mult);
 313   ggc_free (all.neg);
 314   ggc_free (all.plus);
 315   ggc_free (all.reg);
 316 }
 317
 318 /* Return an rtx representing minus the value of X.
 319    MODE is the intended mode of the result,
 320    useful if X is a CONST_INT.  */
 321
 322 rtx
 323 negate_rtx (machine_mode mode, rtx x)
 324 {
 325   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 326
 327   if (result == 0)
 328     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 329
 330   return result;
 331 }
 332
 333 /* Adjust bitfield memory MEM so that it points to the first unit of mode
 334    MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
 335    If MODE is BLKmode, return a reference to every byte in the bitfield.
 336    Set *NEW_BITNUM to the bit position of the field within the new memory.  */
 337
 338 static rtx
 339 narrow_bit_field_mem (rtx mem, machine_mode mode,
 340                       unsigned HOST_WIDE_INT bitsize,
 341                       unsigned HOST_WIDE_INT bitnum,
 342                       unsigned HOST_WIDE_INT *new_bitnum)
 343 {
 344   if (mode == BLKmode)
 345     {
 346       *new_bitnum = bitnum % BITS_PER_UNIT;
 347       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 348       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 349                             / BITS_PER_UNIT);
 350       return adjust_bitfield_address_size (mem, mode, offset, size);
 351     }
 352   else
 353     {
 354       unsigned int unit = GET_MODE_BITSIZE (mode);
 355       *new_bitnum = bitnum % unit;
 356       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 357       return adjust_bitfield_address (mem, mode, offset);
 358     }
 359 }
 360
 361 /* The caller wants to perform insertion or extraction PATTERN on a
 362    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 363    BITREGION_START and BITREGION_END are as for store_bit_field
 364    and FIELDMODE is the natural mode of the field.
 365
 366    Search for a mode that is compatible with the memory access
 367    restrictions and (where applicable) with a register insertion or
 368    extraction.  Return the new memory on success, storing the adjusted
 369    bit position in *NEW_BITNUM.  Return null otherwise.  */
 370
 371 static rtx
 372 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 373                               rtx op0, HOST_WIDE_INT bitsize,
 374                               HOST_WIDE_INT bitnum,
 375                               unsigned HOST_WIDE_INT bitregion_start,
 376                               unsigned HOST_WIDE_INT bitregion_end,
 377                               machine_mode fieldmode,
 378                               unsigned HOST_WIDE_INT *new_bitnum)
 379 {
 380   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 381                                 bitregion_end, MEM_ALIGN (op0),
 382                                 MEM_VOLATILE_P (op0));
 383   machine_mode best_mode;
 384   if (iter.next_mode (&best_mode))
 385     {
 386       /* We can use a memory in BEST_MODE.  See whether this is true for
 387          any wider modes.  All other things being equal, we prefer to
 388          use the widest mode possible because it tends to expose more
 389          CSE opportunities.  */
 390       if (!iter.prefer_smaller_modes ())
 391         {
 392           /* Limit the search to the mode required by the corresponding
 393              register insertion or extraction instruction, if any.  */
 394           machine_mode limit_mode = word_mode;
 395           extraction_insn insn;
 396           if (get_best_reg_extraction_insn (&insn, pattern,
 397                                             GET_MODE_BITSIZE (best_mode),
 398                                             fieldmode))
 399             limit_mode = insn.field_mode;
 400
 401           machine_mode wider_mode;
 402           while (iter.next_mode (&wider_mode)
 403                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 404             best_mode = wider_mode;
 405         }
 406       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 407                                    new_bitnum);
 408     }
 409   return NULL_RTX;
 410 }
 411
 412 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 413    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 414    offset is then BITNUM / BITS_PER_UNIT.  */
 415
 416 static bool
 417 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 418                      unsigned HOST_WIDE_INT bitsize,
 419                      machine_mode struct_mode)
 420 {
 421   if (BYTES_BIG_ENDIAN)
 422     return (bitnum % BITS_PER_UNIT == 0
 423             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 424                 || (bitnum + bitsize) % BITS_PER_WORD == 0));
 425   else
 426     return bitnum % BITS_PER_WORD == 0;
 427 }
 428
 429 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 430    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 431    Return false if the access would touch memory outside the range
 432    BITREGION_START to BITREGION_END for conformance to the C++ memory
 433    model.  */
 434
 435 static bool
 436 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 437                             unsigned HOST_WIDE_INT bitnum,
 438                             machine_mode fieldmode,
 439                             unsigned HOST_WIDE_INT bitregion_start,
 440                             unsigned HOST_WIDE_INT bitregion_end)
 441 {
 442   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 443
 444   /* -fstrict-volatile-bitfields must be enabled and we must have a
 445      volatile MEM.  */
 446   if (!MEM_P (op0)
 447       || !MEM_VOLATILE_P (op0)
 448       || flag_strict_volatile_bitfields <= 0)
 449     return false;
 450
 451   /* Non-integral modes likely only happen with packed structures.
 452      Punt.  */
 453   if (!SCALAR_INT_MODE_P (fieldmode))
 454     return false;
 455
 456   /* The bit size must not be larger than the field mode, and
 457      the field mode must not be larger than a word.  */
 458   if (bitsize > modesize || modesize > BITS_PER_WORD)
 459     return false;
 460
 461   /* Check for cases of unaligned fields that must be split.  */
 462   if (bitnum % modesize + bitsize > modesize)
 463     return false;
 464
 465   /* The memory must be sufficiently aligned for a MODESIZE access.
 466      This condition guarantees, that the memory access will not
 467      touch anything after the end of the structure.  */
 468   if (MEM_ALIGN (op0) < modesize)
 469     return false;
 470
 471   /* Check for cases where the C++ memory model applies.  */
 472   if (bitregion_end != 0
 473       && (bitnum - bitnum % modesize < bitregion_start
 474           || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
 475     return false;
 476
 477   return true;
 478 }
 479
 480 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 481    bit number BITNUM can be treated as a simple value of mode MODE.  */
 482
 483 static bool
 484 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 485                        unsigned HOST_WIDE_INT bitnum, machine_mode mode)
 486 {
 487   return (MEM_P (op0)
 488           && bitnum % BITS_PER_UNIT == 0
 489           && bitsize == GET_MODE_BITSIZE (mode)
 490           && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
 491               || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
 492                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 493 }
 494 \f
 495 /* Try to use instruction INSV to store VALUE into a field of OP0.
 496    BITSIZE and BITNUM are as for store_bit_field.  */
 497
 498 static bool
 499 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 500                             unsigned HOST_WIDE_INT bitsize,
 501                             unsigned HOST_WIDE_INT bitnum,
 502                             rtx value)
 503 {
 504   struct expand_operand ops[4];
 505   rtx value1;
 506   rtx xop0 = op0;
 507   rtx_insn *last = get_last_insn ();
 508   bool copy_back = false;
 509
 510   machine_mode op_mode = insv->field_mode;
 511   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 512   if (bitsize == 0 || bitsize > unit)
 513     return false;
 514
 515   if (MEM_P (xop0))
 516     /* Get a reference to the first byte of the field.  */
 517     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 518                                  &bitnum);
 519   else
 520     {
 521       /* Convert from counting within OP0 to counting in OP_MODE.  */
 522       if (BYTES_BIG_ENDIAN)
 523         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 524
 525       /* If xop0 is a register, we need it in OP_MODE
 526          to make it acceptable to the format of insv.  */
 527       if (GET_CODE (xop0) == SUBREG)
 528         /* We can't just change the mode, because this might clobber op0,
 529            and we will need the original value of op0 if insv fails.  */
 530         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 531       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 532         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 533     }
 534
 535   /* If the destination is a paradoxical subreg such that we need a
 536      truncate to the inner mode, perform the insertion on a temporary and
 537      truncate the result to the original destination.  Note that we can't
 538      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 539      X) 0)) is (reg:N X).  */
 540   if (GET_CODE (xop0) == SUBREG
 541       && REG_P (SUBREG_REG (xop0))
 542       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 543                                          op_mode))
 544     {
 545       rtx tem = gen_reg_rtx (op_mode);
 546       emit_move_insn (tem, xop0);
 547       xop0 = tem;
 548       copy_back = true;
 549     }
 550
 551   /* There are similar overflow check at the start of store_bit_field_1,
 552      but that only check the situation where the field lies completely
 553      outside the register, while there do have situation where the field
 554      lies partialy in the register, we need to adjust bitsize for this
 555      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
 556      will broken on those arch support bit insert instruction, like arm, aarch64
 557      etc.  */
 558   if (bitsize + bitnum > unit && bitnum < unit)
 559     {
 560       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
 561                "destination object, data truncated into %wu-bit",
 562                bitsize, unit - bitnum);
 563       bitsize = unit - bitnum;
 564     }
 565
 566   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 567      "backwards" from the size of the unit we are inserting into.
 568      Otherwise, we count bits from the most significant on a
 569      BYTES/BITS_BIG_ENDIAN machine.  */
 570
 571   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 572     bitnum = unit - bitsize - bitnum;
 573
 574   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 575   value1 = value;
 576   if (GET_MODE (value) != op_mode)
 577     {
 578       if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 579         {
 580           /* Optimization: Don't bother really extending VALUE
 581              if it has all the bits we will actually use.  However,
 582              if we must narrow it, be sure we do it correctly.  */
 583
 584           if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 585             {
 586               rtx tmp;
 587
 588               tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 589               if (! tmp)
 590                 tmp = simplify_gen_subreg (op_mode,
 591                                            force_reg (GET_MODE (value),
 592                                                       value1),
 593                                            GET_MODE (value), 0);
 594               value1 = tmp;
 595             }
 596           else
 597             value1 = gen_lowpart (op_mode, value1);
 598         }
 599       else if (CONST_INT_P (value))
 600         value1 = gen_int_mode (INTVAL (value), op_mode);
 601       else
 602         /* Parse phase is supposed to make VALUE's data type
 603            match that of the component reference, which is a type
 604            at least as wide as the field; so VALUE should have
 605            a mode that corresponds to that type.  */
 606         gcc_assert (CONSTANT_P (value));
 607     }
 608
 609   create_fixed_operand (&ops[0], xop0);
 610   create_integer_operand (&ops[1], bitsize);
 611   create_integer_operand (&ops[2], bitnum);
 612   create_input_operand (&ops[3], value1, op_mode);
 613   if (maybe_expand_insn (insv->icode, 4, ops))
 614     {
 615       if (copy_back)
 616         convert_move (op0, xop0, true);
 617       return true;
 618     }
 619   delete_insns_since (last);
 620   return false;
 621 }
 622
 623 /* A subroutine of store_bit_field, with the same arguments.  Return true
 624    if the operation could be implemented.
 625
 626    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 627    no other way of implementing the operation.  If FALLBACK_P is false,
 628    return false instead.  */
 629
 630 static bool
 631 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 632                    unsigned HOST_WIDE_INT bitnum,
 633                    unsigned HOST_WIDE_INT bitregion_start,
 634                    unsigned HOST_WIDE_INT bitregion_end,
 635                    machine_mode fieldmode,
 636                    rtx value, bool fallback_p)
 637 {
 638   rtx op0 = str_rtx;
 639   rtx orig_value;
 640
 641   while (GET_CODE (op0) == SUBREG)
 642     {
 643       /* The following line once was done only if WORDS_BIG_ENDIAN,
 644          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 645          meaningful at a much higher level; when structures are copied
 646          between memory and regs, the higher-numbered regs
 647          always get higher addresses.  */
 648       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 649       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 650       int byte_offset = 0;
 651
 652       /* Paradoxical subregs need special handling on big endian machines.  */
 653       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 654         {
 655           int difference = inner_mode_size - outer_mode_size;
 656
 657           if (WORDS_BIG_ENDIAN)
 658             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 659           if (BYTES_BIG_ENDIAN)
 660             byte_offset += difference % UNITS_PER_WORD;
 661         }
 662       else
 663         byte_offset = SUBREG_BYTE (op0);
 664
 665       bitnum += byte_offset * BITS_PER_UNIT;
 666       op0 = SUBREG_REG (op0);
 667     }
 668
 669   /* No action is needed if the target is a register and if the field
 670      lies completely outside that register.  This can occur if the source
 671      code contains an out-of-bounds access to a small array.  */
 672   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 673     return true;
 674
 675   /* Use vec_set patterns for inserting parts of vectors whenever
 676      available.  */
 677   if (VECTOR_MODE_P (GET_MODE (op0))
 678       && !MEM_P (op0)
 679       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 680       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 681       && bitsize == GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
 682       && !(bitnum % GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
 683     {
 684       struct expand_operand ops[3];
 685       machine_mode outermode = GET_MODE (op0);
 686       machine_mode innermode = GET_MODE_INNER (outermode);
 687       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 688       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 689
 690       create_fixed_operand (&ops[0], op0);
 691       create_input_operand (&ops[1], value, innermode);
 692       create_integer_operand (&ops[2], pos);
 693       if (maybe_expand_insn (icode, 3, ops))
 694         return true;
 695     }
 696
 697   /* If the target is a register, overwriting the entire object, or storing
 698      a full-word or multi-word field can be done with just a SUBREG.  */
 699   if (!MEM_P (op0)
 700       && bitsize == GET_MODE_BITSIZE (fieldmode)
 701       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 702           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 703     {
 704       /* Use the subreg machinery either to narrow OP0 to the required
 705          words or to cope with mode punning between equal-sized modes.
 706          In the latter case, use subreg on the rhs side, not lhs.  */
 707       rtx sub;
 708
 709       if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
 710         {
 711           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 712           if (sub)
 713             {
 714               emit_move_insn (op0, sub);
 715               return true;
 716             }
 717         }
 718       else
 719         {
 720           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 721                                      bitnum / BITS_PER_UNIT);
 722           if (sub)
 723             {
 724               emit_move_insn (sub, value);
 725               return true;
 726             }
 727         }
 728     }
 729
 730   /* If the target is memory, storing any naturally aligned field can be
 731      done with a simple store.  For targets that support fast unaligned
 732      memory, any naturally sized, unit aligned field can be done directly.  */
 733   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
 734     {
 735       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 736       emit_move_insn (op0, value);
 737       return true;
 738     }
 739
 740   /* Make sure we are playing with integral modes.  Pun with subregs
 741      if we aren't.  This must come after the entire register case above,
 742      since that case is valid for any mode.  The following cases are only
 743      valid for integral modes.  */
 744   {
 745     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 746     if (imode != GET_MODE (op0))
 747       {
 748         if (MEM_P (op0))
 749           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
 750         else
 751           {
 752             gcc_assert (imode != BLKmode);
 753             op0 = gen_lowpart (imode, op0);
 754           }
 755       }
 756   }
 757
 758   /* Storing an lsb-aligned field in a register
 759      can be done with a movstrict instruction.  */
 760
 761   if (!MEM_P (op0)
 762       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 763       && bitsize == GET_MODE_BITSIZE (fieldmode)
 764       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 765     {
 766       struct expand_operand ops[2];
 767       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 768       rtx arg0 = op0;
 769       unsigned HOST_WIDE_INT subreg_off;
 770
 771       if (GET_CODE (arg0) == SUBREG)
 772         {
 773           /* Else we've got some float mode source being extracted into
 774              a different float mode destination -- this combination of
 775              subregs results in Severe Tire Damage.  */
 776           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 777                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 778                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 779           arg0 = SUBREG_REG (arg0);
 780         }
 781
 782       subreg_off = bitnum / BITS_PER_UNIT;
 783       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 784         {
 785           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 786
 787           create_fixed_operand (&ops[0], arg0);
 788           /* Shrink the source operand to FIELDMODE.  */
 789           create_convert_operand_to (&ops[1], value, fieldmode, false);
 790           if (maybe_expand_insn (icode, 2, ops))
 791             return true;
 792         }
 793     }
 794
 795   /* Handle fields bigger than a word.  */
 796
 797   if (bitsize > BITS_PER_WORD)
 798     {
 799       /* Here we transfer the words of the field
 800          in the order least significant first.
 801          This is because the most significant word is the one which may
 802          be less than full.
 803          However, only do that if the value is not BLKmode.  */
 804
 805       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 806       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 807       unsigned int i;
 808       rtx_insn *last;
 809
 810       /* This is the mode we must force value to, so that there will be enough
 811          subwords to extract.  Note that fieldmode will often (always?) be
 812          VOIDmode, because that is what store_field uses to indicate that this
 813          is a bit field, but passing VOIDmode to operand_subword_force
 814          is not allowed.  */
 815       fieldmode = GET_MODE (value);
 816       if (fieldmode == VOIDmode)
 817         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 818
 819       last = get_last_insn ();
 820       for (i = 0; i < nwords; i++)
 821         {
 822           /* If I is 0, use the low-order word in both field and target;
 823              if I is 1, use the next to lowest word; and so on.  */
 824           unsigned int wordnum = (backwards
 825                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 826                                   - i - 1
 827                                   : i);
 828           unsigned int bit_offset = (backwards
 829                                      ? MAX ((int) bitsize - ((int) i + 1)
 830                                             * BITS_PER_WORD,
 831                                             0)
 832                                      : (int) i * BITS_PER_WORD);
 833           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 834           unsigned HOST_WIDE_INT new_bitsize =
 835             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 836
 837           /* If the remaining chunk doesn't have full wordsize we have
 838              to make sure that for big endian machines the higher order
 839              bits are used.  */
 840           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 841             value_word = simplify_expand_binop (word_mode, lshr_optab,
 842                                                 value_word,
 843                                                 GEN_INT (BITS_PER_WORD
 844                                                          - new_bitsize),
 845                                                 NULL_RTX, true,
 846                                                 OPTAB_LIB_WIDEN);
 847
 848           if (!store_bit_field_1 (op0, new_bitsize,
 849                                   bitnum + bit_offset,
 850                                   bitregion_start, bitregion_end,
 851                                   word_mode,
 852                                   value_word, fallback_p))
 853             {
 854               delete_insns_since (last);
 855               return false;
 856             }
 857         }
 858       return true;
 859     }
 860
 861   /* If VALUE has a floating-point or complex mode, access it as an
 862      integer of the corresponding size.  This can occur on a machine
 863      with 64 bit registers that uses SFmode for float.  It can also
 864      occur for unaligned float or complex fields.  */
 865   orig_value = value;
 866   if (GET_MODE (value) != VOIDmode
 867       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 868       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 869     {
 870       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 871       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 872     }
 873
 874   /* If OP0 is a multi-word register, narrow it to the affected word.
 875      If the region spans two words, defer to store_split_bit_field.  */
 876   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 877     {
 878       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
 879                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 880       gcc_assert (op0);
 881       bitnum %= BITS_PER_WORD;
 882       if (bitnum + bitsize > BITS_PER_WORD)
 883         {
 884           if (!fallback_p)
 885             return false;
 886
 887           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 888                                  bitregion_end, value);
 889           return true;
 890         }
 891     }
 892
 893   /* From here on we can assume that the field to be stored in fits
 894      within a word.  If the destination is a register, it too fits
 895      in a word.  */
 896
 897   extraction_insn insv;
 898   if (!MEM_P (op0)
 899       && get_best_reg_extraction_insn (&insv, EP_insv,
 900                                        GET_MODE_BITSIZE (GET_MODE (op0)),
 901                                        fieldmode)
 902       && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 903     return true;
 904
 905   /* If OP0 is a memory, try copying it to a register and seeing if a
 906      cheap register alternative is available.  */
 907   if (MEM_P (op0))
 908     {
 909       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
 910                                         fieldmode)
 911           && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 912         return true;
 913
 914       rtx_insn *last = get_last_insn ();
 915
 916       /* Try loading part of OP0 into a register, inserting the bitfield
 917          into that, and then copying the result back to OP0.  */
 918       unsigned HOST_WIDE_INT bitpos;
 919       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
 920                                                bitregion_start, bitregion_end,
 921                                                fieldmode, &bitpos);
 922       if (xop0)
 923         {
 924           rtx tempreg = copy_to_reg (xop0);
 925           if (store_bit_field_1 (tempreg, bitsize, bitpos,
 926                                  bitregion_start, bitregion_end,
 927                                  fieldmode, orig_value, false))
 928             {
 929               emit_move_insn (xop0, tempreg);
 930               return true;
 931             }
 932           delete_insns_since (last);
 933         }
 934     }
 935
 936   if (!fallback_p)
 937     return false;
 938
 939   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
 940                          bitregion_end, value);
 941   return true;
 942 }
 943
 944 /* Generate code to store value from rtx VALUE
 945    into a bit-field within structure STR_RTX
 946    containing BITSIZE bits starting at bit BITNUM.
 947
 948    BITREGION_START is bitpos of the first bitfield in this region.
 949    BITREGION_END is the bitpos of the ending bitfield in this region.
 950    These two fields are 0, if the C++ memory model does not apply,
 951    or we are not interested in keeping track of bitfield regions.
 952
 953    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 954
 955 void
 956 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 957                  unsigned HOST_WIDE_INT bitnum,
 958                  unsigned HOST_WIDE_INT bitregion_start,
 959                  unsigned HOST_WIDE_INT bitregion_end,
 960                  machine_mode fieldmode,
 961                  rtx value)
 962 {
 963   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
 964   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
 965                                   bitregion_start, bitregion_end))
 966     {
 967       /* Storing of a full word can be done with a simple store.
 968          We know here that the field can be accessed with one single
 969          instruction.  For targets that support unaligned memory,
 970          an unaligned access may be necessary.  */
 971       if (bitsize == GET_MODE_BITSIZE (fieldmode))
 972         {
 973           str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
 974                                              bitnum / BITS_PER_UNIT);
 975           gcc_assert (bitnum % BITS_PER_UNIT == 0);
 976           emit_move_insn (str_rtx, value);
 977         }
 978       else
 979         {
 980           rtx temp;
 981
 982           str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
 983                                           &bitnum);
 984           gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (fieldmode));
 985           temp = copy_to_reg (str_rtx);
 986           if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
 987                                   fieldmode, value, true))
 988             gcc_unreachable ();
 989
 990           emit_move_insn (str_rtx, temp);
 991         }
 992
 993       return;
 994     }
 995
 996   /* Under the C++0x memory model, we must not touch bits outside the
 997      bit region.  Adjust the address to start at the beginning of the
 998      bit region.  */
 999   if (MEM_P (str_rtx) && bitregion_start > 0)
1000     {
1001       machine_mode bestmode;
1002       HOST_WIDE_INT offset, size;
1003
1004       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1005
1006       offset = bitregion_start / BITS_PER_UNIT;
1007       bitnum -= bitregion_start;
1008       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1009       bitregion_end -= bitregion_start;
1010       bitregion_start = 0;
1011       bestmode = get_best_mode (bitsize, bitnum,
1012                                 bitregion_start, bitregion_end,
1013                                 MEM_ALIGN (str_rtx), VOIDmode,
1014                                 MEM_VOLATILE_P (str_rtx));
1015       str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
1016     }
1017
1018   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1019                           bitregion_start, bitregion_end,
1020                           fieldmode, value, true))
1021     gcc_unreachable ();
1022 }
1023 \f
1024 /* Use shifts and boolean operations to store VALUE into a bit field of
1025    width BITSIZE in OP0, starting at bit BITNUM.  */
1026
1027 static void
1028 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1029                        unsigned HOST_WIDE_INT bitnum,
1030                        unsigned HOST_WIDE_INT bitregion_start,
1031                        unsigned HOST_WIDE_INT bitregion_end,
1032                        rtx value)
1033 {
1034   /* There is a case not handled here:
1035      a structure with a known alignment of just a halfword
1036      and a field split across two aligned halfwords within the structure.
1037      Or likewise a structure with a known alignment of just a byte
1038      and a field split across two bytes.
1039      Such cases are not supposed to be able to occur.  */
1040
1041   if (MEM_P (op0))
1042     {
1043       machine_mode mode = GET_MODE (op0);
1044       if (GET_MODE_BITSIZE (mode) == 0
1045           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1046         mode = word_mode;
1047       mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1048                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1049
1050       if (mode == VOIDmode)
1051         {
1052           /* The only way this should occur is if the field spans word
1053              boundaries.  */
1054           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1055                                  bitregion_end, value);
1056           return;
1057         }
1058
1059       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1060     }
1061
1062   store_fixed_bit_field_1 (op0, bitsize, bitnum, value);
1063 }
1064
1065 /* Helper function for store_fixed_bit_field, stores
1066    the bit field always using the MODE of OP0.  */
1067
1068 static void
1069 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1070                          unsigned HOST_WIDE_INT bitnum,
1071                          rtx value)
1072 {
1073   machine_mode mode;
1074   rtx temp;
1075   int all_zero = 0;
1076   int all_one = 0;
1077
1078   mode = GET_MODE (op0);
1079   gcc_assert (SCALAR_INT_MODE_P (mode));
1080
1081   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1082      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1083
1084   if (BYTES_BIG_ENDIAN)
1085     /* BITNUM is the distance between our msb
1086        and that of the containing datum.
1087        Convert it to the distance from the lsb.  */
1088     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1089
1090   /* Now BITNUM is always the distance between our lsb
1091      and that of OP0.  */
1092
1093   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1094      we must first convert its mode to MODE.  */
1095
1096   if (CONST_INT_P (value))
1097     {
1098       unsigned HOST_WIDE_INT v = UINTVAL (value);
1099
1100       if (bitsize < HOST_BITS_PER_WIDE_INT)
1101         v &= ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1;
1102
1103       if (v == 0)
1104         all_zero = 1;
1105       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1106                 && v == ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1)
1107                || (bitsize == HOST_BITS_PER_WIDE_INT
1108                    && v == (unsigned HOST_WIDE_INT) -1))
1109         all_one = 1;
1110
1111       value = lshift_value (mode, v, bitnum);
1112     }
1113   else
1114     {
1115       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1116                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1117
1118       if (GET_MODE (value) != mode)
1119         value = convert_to_mode (mode, value, 1);
1120
1121       if (must_and)
1122         value = expand_binop (mode, and_optab, value,
1123                               mask_rtx (mode, 0, bitsize, 0),
1124                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1125       if (bitnum > 0)
1126         value = expand_shift (LSHIFT_EXPR, mode, value,
1127                               bitnum, NULL_RTX, 1);
1128     }
1129
1130   /* Now clear the chosen bits in OP0,
1131      except that if VALUE is -1 we need not bother.  */
1132   /* We keep the intermediates in registers to allow CSE to combine
1133      consecutive bitfield assignments.  */
1134
1135   temp = force_reg (mode, op0);
1136
1137   if (! all_one)
1138     {
1139       temp = expand_binop (mode, and_optab, temp,
1140                            mask_rtx (mode, bitnum, bitsize, 1),
1141                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1142       temp = force_reg (mode, temp);
1143     }
1144
1145   /* Now logical-or VALUE into OP0, unless it is zero.  */
1146
1147   if (! all_zero)
1148     {
1149       temp = expand_binop (mode, ior_optab, temp, value,
1150                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1151       temp = force_reg (mode, temp);
1152     }
1153
1154   if (op0 != temp)
1155     {
1156       op0 = copy_rtx (op0);
1157       emit_move_insn (op0, temp);
1158     }
1159 }
1160 \f
1161 /* Store a bit field that is split across multiple accessible memory objects.
1162
1163    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1164    BITSIZE is the field width; BITPOS the position of its first bit
1165    (within the word).
1166    VALUE is the value to store.
1167
1168    This does not yet handle fields wider than BITS_PER_WORD.  */
1169
1170 static void
1171 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1172                        unsigned HOST_WIDE_INT bitpos,
1173                        unsigned HOST_WIDE_INT bitregion_start,
1174                        unsigned HOST_WIDE_INT bitregion_end,
1175                        rtx value)
1176 {
1177   unsigned int unit;
1178   unsigned int bitsdone = 0;
1179
1180   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1181      much at a time.  */
1182   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1183     unit = BITS_PER_WORD;
1184   else
1185     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1186
1187   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1188      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1189      again, and we will mutually recurse forever.  */
1190   if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1191     unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1192
1193   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1194      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1195      that VALUE might be a floating-point constant.  */
1196   if (CONSTANT_P (value) && !CONST_INT_P (value))
1197     {
1198       rtx word = gen_lowpart_common (word_mode, value);
1199
1200       if (word && (value != word))
1201         value = word;
1202       else
1203         value = gen_lowpart_common (word_mode,
1204                                     force_reg (GET_MODE (value) != VOIDmode
1205                                                ? GET_MODE (value)
1206                                                : word_mode, value));
1207     }
1208
1209   while (bitsdone < bitsize)
1210     {
1211       unsigned HOST_WIDE_INT thissize;
1212       rtx part, word;
1213       unsigned HOST_WIDE_INT thispos;
1214       unsigned HOST_WIDE_INT offset;
1215
1216       offset = (bitpos + bitsdone) / unit;
1217       thispos = (bitpos + bitsdone) % unit;
1218
1219       /* When region of bytes we can touch is restricted, decrease
1220          UNIT close to the end of the region as needed.  If op0 is a REG
1221          or SUBREG of REG, don't do this, as there can't be data races
1222          on a register and we can expand shorter code in some cases.  */
1223       if (bitregion_end
1224           && unit > BITS_PER_UNIT
1225           && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1226           && !REG_P (op0)
1227           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1228         {
1229           unit = unit / 2;
1230           continue;
1231         }
1232
1233       /* THISSIZE must not overrun a word boundary.  Otherwise,
1234          store_fixed_bit_field will call us again, and we will mutually
1235          recurse forever.  */
1236       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1237       thissize = MIN (thissize, unit - thispos);
1238
1239       if (BYTES_BIG_ENDIAN)
1240         {
1241           /* Fetch successively less significant portions.  */
1242           if (CONST_INT_P (value))
1243             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1244                              >> (bitsize - bitsdone - thissize))
1245                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1246           else
1247             {
1248               int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1249               /* The args are chosen so that the last part includes the
1250                  lsb.  Give extract_bit_field the value it needs (with
1251                  endianness compensation) to fetch the piece we want.  */
1252               part = extract_fixed_bit_field (word_mode, value, thissize,
1253                                               total_bits - bitsize + bitsdone,
1254                                               NULL_RTX, 1);
1255             }
1256         }
1257       else
1258         {
1259           /* Fetch successively more significant portions.  */
1260           if (CONST_INT_P (value))
1261             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1262                              >> bitsdone)
1263                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1264           else
1265             part = extract_fixed_bit_field (word_mode, value, thissize,
1266                                             bitsdone, NULL_RTX, 1);
1267         }
1268
1269       /* If OP0 is a register, then handle OFFSET here.
1270
1271          When handling multiword bitfields, extract_bit_field may pass
1272          down a word_mode SUBREG of a larger REG for a bitfield that actually
1273          crosses a word boundary.  Thus, for a SUBREG, we must find
1274          the current word starting from the base register.  */
1275       if (GET_CODE (op0) == SUBREG)
1276         {
1277           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD)
1278                             + (offset * unit / BITS_PER_WORD);
1279           machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1280           if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1281             word = word_offset ? const0_rtx : op0;
1282           else
1283             word = operand_subword_force (SUBREG_REG (op0), word_offset,
1284                                           GET_MODE (SUBREG_REG (op0)));
1285           offset &= BITS_PER_WORD / unit - 1;
1286         }
1287       else if (REG_P (op0))
1288         {
1289           machine_mode op0_mode = GET_MODE (op0);
1290           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1291             word = offset ? const0_rtx : op0;
1292           else
1293             word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1294                                           GET_MODE (op0));
1295           offset &= BITS_PER_WORD / unit - 1;
1296         }
1297       else
1298         word = op0;
1299
1300       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1301          it is just an out-of-bounds access.  Ignore it.  */
1302       if (word != const0_rtx)
1303         store_fixed_bit_field (word, thissize, offset * unit + thispos,
1304                                bitregion_start, bitregion_end, part);
1305       bitsdone += thissize;
1306     }
1307 }
1308 \f
1309 /* A subroutine of extract_bit_field_1 that converts return value X
1310    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1311    to extract_bit_field.  */
1312
1313 static rtx
1314 convert_extracted_bit_field (rtx x, machine_mode mode,
1315                              machine_mode tmode, bool unsignedp)
1316 {
1317   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1318     return x;
1319
1320   /* If the x mode is not a scalar integral, first convert to the
1321      integer mode of that size and then access it as a floating-point
1322      value via a SUBREG.  */
1323   if (!SCALAR_INT_MODE_P (tmode))
1324     {
1325       machine_mode smode;
1326
1327       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1328       x = convert_to_mode (smode, x, unsignedp);
1329       x = force_reg (smode, x);
1330       return gen_lowpart (tmode, x);
1331     }
1332
1333   return convert_to_mode (tmode, x, unsignedp);
1334 }
1335
1336 /* Try to use an ext(z)v pattern to extract a field from OP0.
1337    Return the extracted value on success, otherwise return null.
1338    EXT_MODE is the mode of the extraction and the other arguments
1339    are as for extract_bit_field.  */
1340
1341 static rtx
1342 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1343                               unsigned HOST_WIDE_INT bitsize,
1344                               unsigned HOST_WIDE_INT bitnum,
1345                               int unsignedp, rtx target,
1346                               machine_mode mode, machine_mode tmode)
1347 {
1348   struct expand_operand ops[4];
1349   rtx spec_target = target;
1350   rtx spec_target_subreg = 0;
1351   machine_mode ext_mode = extv->field_mode;
1352   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1353
1354   if (bitsize == 0 || unit < bitsize)
1355     return NULL_RTX;
1356
1357   if (MEM_P (op0))
1358     /* Get a reference to the first byte of the field.  */
1359     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1360                                 &bitnum);
1361   else
1362     {
1363       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1364       if (BYTES_BIG_ENDIAN)
1365         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1366
1367       /* If op0 is a register, we need it in EXT_MODE to make it
1368          acceptable to the format of ext(z)v.  */
1369       if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1370         return NULL_RTX;
1371       if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1372         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1373     }
1374
1375   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1376      "backwards" from the size of the unit we are extracting from.
1377      Otherwise, we count bits from the most significant on a
1378      BYTES/BITS_BIG_ENDIAN machine.  */
1379
1380   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1381     bitnum = unit - bitsize - bitnum;
1382
1383   if (target == 0)
1384     target = spec_target = gen_reg_rtx (tmode);
1385
1386   if (GET_MODE (target) != ext_mode)
1387     {
1388       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1389          between the mode of the extraction (word_mode) and the target
1390          mode.  Instead, create a temporary and use convert_move to set
1391          the target.  */
1392       if (REG_P (target)
1393           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1394         {
1395           target = gen_lowpart (ext_mode, target);
1396           if (GET_MODE_PRECISION (ext_mode)
1397               > GET_MODE_PRECISION (GET_MODE (spec_target)))
1398             spec_target_subreg = target;
1399         }
1400       else
1401         target = gen_reg_rtx (ext_mode);
1402     }
1403
1404   create_output_operand (&ops[0], target, ext_mode);
1405   create_fixed_operand (&ops[1], op0);
1406   create_integer_operand (&ops[2], bitsize);
1407   create_integer_operand (&ops[3], bitnum);
1408   if (maybe_expand_insn (extv->icode, 4, ops))
1409     {
1410       target = ops[0].value;
1411       if (target == spec_target)
1412         return target;
1413       if (target == spec_target_subreg)
1414         return spec_target;
1415       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1416     }
1417   return NULL_RTX;
1418 }
1419
1420 /* A subroutine of extract_bit_field, with the same arguments.
1421    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1422    if we can find no other means of implementing the operation.
1423    if FALLBACK_P is false, return NULL instead.  */
1424
1425 static rtx
1426 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1427                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1428                      machine_mode mode, machine_mode tmode,
1429                      bool fallback_p)
1430 {
1431   rtx op0 = str_rtx;
1432   machine_mode int_mode;
1433   machine_mode mode1;
1434
1435   if (tmode == VOIDmode)
1436     tmode = mode;
1437
1438   while (GET_CODE (op0) == SUBREG)
1439     {
1440       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1441       op0 = SUBREG_REG (op0);
1442     }
1443
1444   /* If we have an out-of-bounds access to a register, just return an
1445      uninitialized register of the required mode.  This can occur if the
1446      source code contains an out-of-bounds access to a small array.  */
1447   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1448     return gen_reg_rtx (tmode);
1449
1450   if (REG_P (op0)
1451       && mode == GET_MODE (op0)
1452       && bitnum == 0
1453       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1454     {
1455       /* We're trying to extract a full register from itself.  */
1456       return op0;
1457     }
1458
1459   /* See if we can get a better vector mode before extracting.  */
1460   if (VECTOR_MODE_P (GET_MODE (op0))
1461       && !MEM_P (op0)
1462       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1463     {
1464       machine_mode new_mode;
1465
1466       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1467         new_mode = MIN_MODE_VECTOR_FLOAT;
1468       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1469         new_mode = MIN_MODE_VECTOR_FRACT;
1470       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1471         new_mode = MIN_MODE_VECTOR_UFRACT;
1472       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1473         new_mode = MIN_MODE_VECTOR_ACCUM;
1474       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1475         new_mode = MIN_MODE_VECTOR_UACCUM;
1476       else
1477         new_mode = MIN_MODE_VECTOR_INT;
1478
1479       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1480         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1481             && targetm.vector_mode_supported_p (new_mode))
1482           break;
1483       if (new_mode != VOIDmode)
1484         op0 = gen_lowpart (new_mode, op0);
1485     }
1486
1487   /* Use vec_extract patterns for extracting parts of vectors whenever
1488      available.  */
1489   if (VECTOR_MODE_P (GET_MODE (op0))
1490       && !MEM_P (op0)
1491       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1492       && ((bitnum + bitsize - 1) / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
1493           == bitnum / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
1494     {
1495       struct expand_operand ops[3];
1496       machine_mode outermode = GET_MODE (op0);
1497       machine_mode innermode = GET_MODE_INNER (outermode);
1498       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1499       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1500
1501       create_output_operand (&ops[0], target, innermode);
1502       create_input_operand (&ops[1], op0, outermode);
1503       create_integer_operand (&ops[2], pos);
1504       if (maybe_expand_insn (icode, 3, ops))
1505         {
1506           target = ops[0].value;
1507           if (GET_MODE (target) != mode)
1508             return gen_lowpart (tmode, target);
1509           return target;
1510         }
1511     }
1512
1513   /* Make sure we are playing with integral modes.  Pun with subregs
1514      if we aren't.  */
1515   {
1516     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1517     if (imode != GET_MODE (op0))
1518       {
1519         if (MEM_P (op0))
1520           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1521         else if (imode != BLKmode)
1522           {
1523             op0 = gen_lowpart (imode, op0);
1524
1525             /* If we got a SUBREG, force it into a register since we
1526                aren't going to be able to do another SUBREG on it.  */
1527             if (GET_CODE (op0) == SUBREG)
1528               op0 = force_reg (imode, op0);
1529           }
1530         else if (REG_P (op0))
1531           {
1532             rtx reg, subreg;
1533             imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1534                                             MODE_INT);
1535             reg = gen_reg_rtx (imode);
1536             subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1537             emit_move_insn (subreg, op0);
1538             op0 = reg;
1539             bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1540           }
1541         else
1542           {
1543             HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1544             rtx mem = assign_stack_temp (GET_MODE (op0), size);
1545             emit_move_insn (mem, op0);
1546             op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1547           }
1548       }
1549   }
1550
1551   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1552      If that's wrong, the solution is to test for it and set TARGET to 0
1553      if needed.  */
1554
1555   /* Get the mode of the field to use for atomic access or subreg
1556      conversion.  */
1557   mode1 = mode;
1558   if (SCALAR_INT_MODE_P (tmode))
1559     {
1560       machine_mode try_mode = mode_for_size (bitsize,
1561                                                   GET_MODE_CLASS (tmode), 0);
1562       if (try_mode != BLKmode)
1563         mode1 = try_mode;
1564     }
1565   gcc_assert (mode1 != BLKmode);
1566
1567   /* Extraction of a full MODE1 value can be done with a subreg as long
1568      as the least significant bit of the value is the least significant
1569      bit of either OP0 or a word of OP0.  */
1570   if (!MEM_P (op0)
1571       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1572       && bitsize == GET_MODE_BITSIZE (mode1)
1573       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1574     {
1575       rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1576                                      bitnum / BITS_PER_UNIT);
1577       if (sub)
1578         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1579     }
1580
1581   /* Extraction of a full MODE1 value can be done with a load as long as
1582      the field is on a byte boundary and is sufficiently aligned.  */
1583   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1584     {
1585       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1586       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1587     }
1588
1589   /* Handle fields bigger than a word.  */
1590
1591   if (bitsize > BITS_PER_WORD)
1592     {
1593       /* Here we transfer the words of the field
1594          in the order least significant first.
1595          This is because the most significant word is the one which may
1596          be less than full.  */
1597
1598       unsigned int backwards = WORDS_BIG_ENDIAN;
1599       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1600       unsigned int i;
1601       rtx_insn *last;
1602
1603       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1604         target = gen_reg_rtx (mode);
1605
1606       /* In case we're about to clobber a base register or something
1607          (see gcc.c-torture/execute/20040625-1.c).   */
1608       if (reg_mentioned_p (target, str_rtx))
1609         target = gen_reg_rtx (mode);
1610
1611       /* Indicate for flow that the entire target reg is being set.  */
1612       emit_clobber (target);
1613
1614       last = get_last_insn ();
1615       for (i = 0; i < nwords; i++)
1616         {
1617           /* If I is 0, use the low-order word in both field and target;
1618              if I is 1, use the next to lowest word; and so on.  */
1619           /* Word number in TARGET to use.  */
1620           unsigned int wordnum
1621             = (backwards
1622                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1623                : i);
1624           /* Offset from start of field in OP0.  */
1625           unsigned int bit_offset = (backwards
1626                                      ? MAX ((int) bitsize - ((int) i + 1)
1627                                             * BITS_PER_WORD,
1628                                             0)
1629                                      : (int) i * BITS_PER_WORD);
1630           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1631           rtx result_part
1632             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1633                                              bitsize - i * BITS_PER_WORD),
1634                                    bitnum + bit_offset, 1, target_part,
1635                                    mode, word_mode, fallback_p);
1636
1637           gcc_assert (target_part);
1638           if (!result_part)
1639             {
1640               delete_insns_since (last);
1641               return NULL;
1642             }
1643
1644           if (result_part != target_part)
1645             emit_move_insn (target_part, result_part);
1646         }
1647
1648       if (unsignedp)
1649         {
1650           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1651              need to be zero'd out.  */
1652           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1653             {
1654               unsigned int i, total_words;
1655
1656               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1657               for (i = nwords; i < total_words; i++)
1658                 emit_move_insn
1659                   (operand_subword (target,
1660                                     backwards ? total_words - i - 1 : i,
1661                                     1, VOIDmode),
1662                    const0_rtx);
1663             }
1664           return target;
1665         }
1666
1667       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1668       target = expand_shift (LSHIFT_EXPR, mode, target,
1669                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1670       return expand_shift (RSHIFT_EXPR, mode, target,
1671                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1672     }
1673
1674   /* If OP0 is a multi-word register, narrow it to the affected word.
1675      If the region spans two words, defer to extract_split_bit_field.  */
1676   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1677     {
1678       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1679                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1680       bitnum %= BITS_PER_WORD;
1681       if (bitnum + bitsize > BITS_PER_WORD)
1682         {
1683           if (!fallback_p)
1684             return NULL_RTX;
1685           target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1686           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1687         }
1688     }
1689
1690   /* From here on we know the desired field is smaller than a word.
1691      If OP0 is a register, it too fits within a word.  */
1692   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1693   extraction_insn extv;
1694   if (!MEM_P (op0)
1695       /* ??? We could limit the structure size to the part of OP0 that
1696          contains the field, with appropriate checks for endianness
1697          and TRULY_NOOP_TRUNCATION.  */
1698       && get_best_reg_extraction_insn (&extv, pattern,
1699                                        GET_MODE_BITSIZE (GET_MODE (op0)),
1700                                        tmode))
1701     {
1702       rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1703                                                  unsignedp, target, mode,
1704                                                  tmode);
1705       if (result)
1706         return result;
1707     }
1708
1709   /* If OP0 is a memory, try copying it to a register and seeing if a
1710      cheap register alternative is available.  */
1711   if (MEM_P (op0))
1712     {
1713       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1714                                         tmode))
1715         {
1716           rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1717                                                      bitnum, unsignedp,
1718                                                      target, mode,
1719                                                      tmode);
1720           if (result)
1721             return result;
1722         }
1723
1724       rtx_insn *last = get_last_insn ();
1725
1726       /* Try loading part of OP0 into a register and extracting the
1727          bitfield from that.  */
1728       unsigned HOST_WIDE_INT bitpos;
1729       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1730                                                0, 0, tmode, &bitpos);
1731       if (xop0)
1732         {
1733           xop0 = copy_to_reg (xop0);
1734           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1735                                             unsignedp, target,
1736                                             mode, tmode, false);
1737           if (result)
1738             return result;
1739           delete_insns_since (last);
1740         }
1741     }
1742
1743   if (!fallback_p)
1744     return NULL;
1745
1746   /* Find a correspondingly-sized integer field, so we can apply
1747      shifts and masks to it.  */
1748   int_mode = int_mode_for_mode (tmode);
1749   if (int_mode == BLKmode)
1750     int_mode = int_mode_for_mode (mode);
1751   /* Should probably push op0 out to memory and then do a load.  */
1752   gcc_assert (int_mode != BLKmode);
1753
1754   target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
1755                                     target, unsignedp);
1756   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1757 }
1758
1759 /* Generate code to extract a byte-field from STR_RTX
1760    containing BITSIZE bits, starting at BITNUM,
1761    and put it in TARGET if possible (if TARGET is nonzero).
1762    Regardless of TARGET, we return the rtx for where the value is placed.
1763
1764    STR_RTX is the structure containing the byte (a REG or MEM).
1765    UNSIGNEDP is nonzero if this is an unsigned bit field.
1766    MODE is the natural mode of the field value once extracted.
1767    TMODE is the mode the caller would like the value to have;
1768    but the value may be returned with type MODE instead.
1769
1770    If a TARGET is specified and we can store in it at no extra cost,
1771    we do so, and return TARGET.
1772    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1773    if they are equally easy.  */
1774
1775 rtx
1776 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1777                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1778                    machine_mode mode, machine_mode tmode)
1779 {
1780   machine_mode mode1;
1781
1782   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1783   if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1784     mode1 = GET_MODE (str_rtx);
1785   else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1786     mode1 = GET_MODE (target);
1787   else
1788     mode1 = tmode;
1789
1790   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1791     {
1792       /* Extraction of a full MODE1 value can be done with a simple load.
1793          We know here that the field can be accessed with one single
1794          instruction.  For targets that support unaligned memory,
1795          an unaligned access may be necessary.  */
1796       if (bitsize == GET_MODE_BITSIZE (mode1))
1797         {
1798           rtx result = adjust_bitfield_address (str_rtx, mode1,
1799                                                 bitnum / BITS_PER_UNIT);
1800           gcc_assert (bitnum % BITS_PER_UNIT == 0);
1801           return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1802         }
1803
1804       str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1805                                       &bitnum);
1806       gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (mode1));
1807       str_rtx = copy_to_reg (str_rtx);
1808     }
1809
1810   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1811                               target, mode, tmode, true);
1812 }
1813 \f
1814 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1815    from bit BITNUM of OP0.
1816
1817    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1818    If TARGET is nonzero, attempts to store the value there
1819    and return TARGET, but this is not guaranteed.
1820    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1821
1822 static rtx
1823 extract_fixed_bit_field (machine_mode tmode, rtx op0,
1824                          unsigned HOST_WIDE_INT bitsize,
1825                          unsigned HOST_WIDE_INT bitnum, rtx target,
1826                          int unsignedp)
1827 {
1828   if (MEM_P (op0))
1829     {
1830       machine_mode mode
1831         = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1832                          MEM_VOLATILE_P (op0));
1833
1834       if (mode == VOIDmode)
1835         /* The only way this should occur is if the field spans word
1836            boundaries.  */
1837         return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1838
1839       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1840     }
1841
1842   return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1843                                     target, unsignedp);
1844 }
1845
1846 /* Helper function for extract_fixed_bit_field, extracts
1847    the bit field always using the MODE of OP0.  */
1848
1849 static rtx
1850 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
1851                            unsigned HOST_WIDE_INT bitsize,
1852                            unsigned HOST_WIDE_INT bitnum, rtx target,
1853                            int unsignedp)
1854 {
1855   machine_mode mode = GET_MODE (op0);
1856   gcc_assert (SCALAR_INT_MODE_P (mode));
1857
1858   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1859      for invalid input, such as extract equivalent of f5 from
1860      gcc.dg/pr48335-2.c.  */
1861
1862   if (BYTES_BIG_ENDIAN)
1863     /* BITNUM is the distance between our msb and that of OP0.
1864        Convert it to the distance from the lsb.  */
1865     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1866
1867   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1868      We have reduced the big-endian case to the little-endian case.  */
1869
1870   if (unsignedp)
1871     {
1872       if (bitnum)
1873         {
1874           /* If the field does not already start at the lsb,
1875              shift it so it does.  */
1876           /* Maybe propagate the target for the shift.  */
1877           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1878           if (tmode != mode)
1879             subtarget = 0;
1880           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1881         }
1882       /* Convert the value to the desired mode.  */
1883       if (mode != tmode)
1884         op0 = convert_to_mode (tmode, op0, 1);
1885
1886       /* Unless the msb of the field used to be the msb when we shifted,
1887          mask out the upper bits.  */
1888
1889       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
1890         return expand_binop (GET_MODE (op0), and_optab, op0,
1891                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1892                              target, 1, OPTAB_LIB_WIDEN);
1893       return op0;
1894     }
1895
1896   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1897      then arithmetic-shift its lsb to the lsb of the word.  */
1898   op0 = force_reg (mode, op0);
1899
1900   /* Find the narrowest integer mode that contains the field.  */
1901
1902   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1903        mode = GET_MODE_WIDER_MODE (mode))
1904     if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
1905       {
1906         op0 = convert_to_mode (mode, op0, 0);
1907         break;
1908       }
1909
1910   if (mode != tmode)
1911     target = 0;
1912
1913   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
1914     {
1915       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
1916       /* Maybe propagate the target for the shift.  */
1917       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1918       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1919     }
1920
1921   return expand_shift (RSHIFT_EXPR, mode, op0,
1922                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1923 }
1924
1925 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1926    VALUE << BITPOS.  */
1927
1928 static rtx
1929 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
1930               int bitpos)
1931 {
1932   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
1933 }
1934 \f
1935 /* Extract a bit field that is split across two words
1936    and return an RTX for the result.
1937
1938    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1939    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1940    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1941
1942 static rtx
1943 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1944                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1945 {
1946   unsigned int unit;
1947   unsigned int bitsdone = 0;
1948   rtx result = NULL_RTX;
1949   int first = 1;
1950
1951   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1952      much at a time.  */
1953   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1954     unit = BITS_PER_WORD;
1955   else
1956     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1957
1958   while (bitsdone < bitsize)
1959     {
1960       unsigned HOST_WIDE_INT thissize;
1961       rtx part, word;
1962       unsigned HOST_WIDE_INT thispos;
1963       unsigned HOST_WIDE_INT offset;
1964
1965       offset = (bitpos + bitsdone) / unit;
1966       thispos = (bitpos + bitsdone) % unit;
1967
1968       /* THISSIZE must not overrun a word boundary.  Otherwise,
1969          extract_fixed_bit_field will call us again, and we will mutually
1970          recurse forever.  */
1971       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1972       thissize = MIN (thissize, unit - thispos);
1973
1974       /* If OP0 is a register, then handle OFFSET here.
1975
1976          When handling multiword bitfields, extract_bit_field may pass
1977          down a word_mode SUBREG of a larger REG for a bitfield that actually
1978          crosses a word boundary.  Thus, for a SUBREG, we must find
1979          the current word starting from the base register.  */
1980       if (GET_CODE (op0) == SUBREG)
1981         {
1982           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1983           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1984                                         GET_MODE (SUBREG_REG (op0)));
1985           offset = 0;
1986         }
1987       else if (REG_P (op0))
1988         {
1989           word = operand_subword_force (op0, offset, GET_MODE (op0));
1990           offset = 0;
1991         }
1992       else
1993         word = op0;
1994
1995       /* Extract the parts in bit-counting order,
1996          whose meaning is determined by BYTES_PER_UNIT.
1997          OFFSET is in UNITs, and UNIT is in bits.  */
1998       part = extract_fixed_bit_field (word_mode, word, thissize,
1999                                       offset * unit + thispos, 0, 1);
2000       bitsdone += thissize;
2001
2002       /* Shift this part into place for the result.  */
2003       if (BYTES_BIG_ENDIAN)
2004         {
2005           if (bitsize != bitsdone)
2006             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2007                                  bitsize - bitsdone, 0, 1);
2008         }
2009       else
2010         {
2011           if (bitsdone != thissize)
2012             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2013                                  bitsdone - thissize, 0, 1);
2014         }
2015
2016       if (first)
2017         result = part;
2018       else
2019         /* Combine the parts with bitwise or.  This works
2020            because we extracted each part as an unsigned bit field.  */
2021         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2022                                OPTAB_LIB_WIDEN);
2023
2024       first = 0;
2025     }
2026
2027   /* Unsigned bit field: we are done.  */
2028   if (unsignedp)
2029     return result;
2030   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2031   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2032                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2033   return expand_shift (RSHIFT_EXPR, word_mode, result,
2034                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2035 }
2036 \f
2037 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2038    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2039    MODE, fill the upper bits with zeros.  Fail if the layout of either
2040    mode is unknown (as for CC modes) or if the extraction would involve
2041    unprofitable mode punning.  Return the value on success, otherwise
2042    return null.
2043
2044    This is different from gen_lowpart* in these respects:
2045
2046      - the returned value must always be considered an rvalue
2047
2048      - when MODE is wider than SRC_MODE, the extraction involves
2049        a zero extension
2050
2051      - when MODE is smaller than SRC_MODE, the extraction involves
2052        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2053
2054    In other words, this routine performs a computation, whereas the
2055    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2056    operations.  */
2057
2058 rtx
2059 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2060 {
2061   machine_mode int_mode, src_int_mode;
2062
2063   if (mode == src_mode)
2064     return src;
2065
2066   if (CONSTANT_P (src))
2067     {
2068       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2069          fails, it will happily create (subreg (symbol_ref)) or similar
2070          invalid SUBREGs.  */
2071       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2072       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2073       if (ret)
2074         return ret;
2075
2076       if (GET_MODE (src) == VOIDmode
2077           || !validate_subreg (mode, src_mode, src, byte))
2078         return NULL_RTX;
2079
2080       src = force_reg (GET_MODE (src), src);
2081       return gen_rtx_SUBREG (mode, src, byte);
2082     }
2083
2084   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2085     return NULL_RTX;
2086
2087   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2088       && MODES_TIEABLE_P (mode, src_mode))
2089     {
2090       rtx x = gen_lowpart_common (mode, src);
2091       if (x)
2092         return x;
2093     }
2094
2095   src_int_mode = int_mode_for_mode (src_mode);
2096   int_mode = int_mode_for_mode (mode);
2097   if (src_int_mode == BLKmode || int_mode == BLKmode)
2098     return NULL_RTX;
2099
2100   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2101     return NULL_RTX;
2102   if (!MODES_TIEABLE_P (int_mode, mode))
2103     return NULL_RTX;
2104
2105   src = gen_lowpart (src_int_mode, src);
2106   src = convert_modes (int_mode, src_int_mode, src, true);
2107   src = gen_lowpart (mode, src);
2108   return src;
2109 }
2110 \f
2111 /* Add INC into TARGET.  */
2112
2113 void
2114 expand_inc (rtx target, rtx inc)
2115 {
2116   rtx value = expand_binop (GET_MODE (target), add_optab,
2117                             target, inc,
2118                             target, 0, OPTAB_LIB_WIDEN);
2119   if (value != target)
2120     emit_move_insn (target, value);
2121 }
2122
2123 /* Subtract DEC from TARGET.  */
2124
2125 void
2126 expand_dec (rtx target, rtx dec)
2127 {
2128   rtx value = expand_binop (GET_MODE (target), sub_optab,
2129                             target, dec,
2130                             target, 0, OPTAB_LIB_WIDEN);
2131   if (value != target)
2132     emit_move_insn (target, value);
2133 }
2134 \f
2135 /* Output a shift instruction for expression code CODE,
2136    with SHIFTED being the rtx for the value to shift,
2137    and AMOUNT the rtx for the amount to shift by.
2138    Store the result in the rtx TARGET, if that is convenient.
2139    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2140    Return the rtx for where the value is.  */
2141
2142 static rtx
2143 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2144                 rtx amount, rtx target, int unsignedp)
2145 {
2146   rtx op1, temp = 0;
2147   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2148   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2149   optab lshift_optab = ashl_optab;
2150   optab rshift_arith_optab = ashr_optab;
2151   optab rshift_uns_optab = lshr_optab;
2152   optab lrotate_optab = rotl_optab;
2153   optab rrotate_optab = rotr_optab;
2154   machine_mode op1_mode;
2155   machine_mode scalar_mode = mode;
2156   int attempt;
2157   bool speed = optimize_insn_for_speed_p ();
2158
2159   if (VECTOR_MODE_P (mode))
2160     scalar_mode = GET_MODE_INNER (mode);
2161   op1 = amount;
2162   op1_mode = GET_MODE (op1);
2163
2164   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2165      shift amount is a vector, use the vector/vector shift patterns.  */
2166   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2167     {
2168       lshift_optab = vashl_optab;
2169       rshift_arith_optab = vashr_optab;
2170       rshift_uns_optab = vlshr_optab;
2171       lrotate_optab = vrotl_optab;
2172       rrotate_optab = vrotr_optab;
2173     }
2174
2175   /* Previously detected shift-counts computed by NEGATE_EXPR
2176      and shifted in the other direction; but that does not work
2177      on all machines.  */
2178
2179   if (SHIFT_COUNT_TRUNCATED)
2180     {
2181       if (CONST_INT_P (op1)
2182           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2183               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2184         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2185                        % GET_MODE_BITSIZE (scalar_mode));
2186       else if (GET_CODE (op1) == SUBREG
2187                && subreg_lowpart_p (op1)
2188                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2189                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2190         op1 = SUBREG_REG (op1);
2191     }
2192
2193   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2194      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2195      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2196      amount instead.  */
2197   if (rotate
2198       && CONST_INT_P (op1)
2199       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2200                    GET_MODE_BITSIZE (scalar_mode) - 1))
2201     {
2202       op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2203       left = !left;
2204       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2205     }
2206
2207   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2208      Note that this is not the case for bigger values.  For instance a rotation
2209      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2210      0x04030201 (bswapsi).  */
2211   if (rotate
2212       && CONST_INT_P (op1)
2213       && INTVAL (op1) == BITS_PER_UNIT
2214       && GET_MODE_SIZE (scalar_mode) == 2
2215       && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2216     return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2217                                   unsignedp);
2218
2219   if (op1 == const0_rtx)
2220     return shifted;
2221
2222   /* Check whether its cheaper to implement a left shift by a constant
2223      bit count by a sequence of additions.  */
2224   if (code == LSHIFT_EXPR
2225       && CONST_INT_P (op1)
2226       && INTVAL (op1) > 0
2227       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2228       && INTVAL (op1) < MAX_BITS_PER_WORD
2229       && (shift_cost (speed, mode, INTVAL (op1))
2230           > INTVAL (op1) * add_cost (speed, mode))
2231       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2232     {
2233       int i;
2234       for (i = 0; i < INTVAL (op1); i++)
2235         {
2236           temp = force_reg (mode, shifted);
2237           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2238                                   unsignedp, OPTAB_LIB_WIDEN);
2239         }
2240       return shifted;
2241     }
2242
2243   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2244     {
2245       enum optab_methods methods;
2246
2247       if (attempt == 0)
2248         methods = OPTAB_DIRECT;
2249       else if (attempt == 1)
2250         methods = OPTAB_WIDEN;
2251       else
2252         methods = OPTAB_LIB_WIDEN;
2253
2254       if (rotate)
2255         {
2256           /* Widening does not work for rotation.  */
2257           if (methods == OPTAB_WIDEN)
2258             continue;
2259           else if (methods == OPTAB_LIB_WIDEN)
2260             {
2261               /* If we have been unable to open-code this by a rotation,
2262                  do it as the IOR of two shifts.  I.e., to rotate A
2263                  by N bits, compute
2264                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2265                  where C is the bitsize of A.
2266
2267                  It is theoretically possible that the target machine might
2268                  not be able to perform either shift and hence we would
2269                  be making two libcalls rather than just the one for the
2270                  shift (similarly if IOR could not be done).  We will allow
2271                  this extremely unlikely lossage to avoid complicating the
2272                  code below.  */
2273
2274               rtx subtarget = target == shifted ? 0 : target;
2275               rtx new_amount, other_amount;
2276               rtx temp1;
2277
2278               new_amount = op1;
2279               if (op1 == const0_rtx)
2280                 return shifted;
2281               else if (CONST_INT_P (op1))
2282                 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2283                                         - INTVAL (op1));
2284               else
2285                 {
2286                   other_amount
2287                     = simplify_gen_unary (NEG, GET_MODE (op1),
2288                                           op1, GET_MODE (op1));
2289                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2290                   other_amount
2291                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2292                                            gen_int_mode (mask, GET_MODE (op1)));
2293                 }
2294
2295               shifted = force_reg (mode, shifted);
2296
2297               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2298                                      mode, shifted, new_amount, 0, 1);
2299               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2300                                       mode, shifted, other_amount,
2301                                       subtarget, 1);
2302               return expand_binop (mode, ior_optab, temp, temp1, target,
2303                                    unsignedp, methods);
2304             }
2305
2306           temp = expand_binop (mode,
2307                                left ? lrotate_optab : rrotate_optab,
2308                                shifted, op1, target, unsignedp, methods);
2309         }
2310       else if (unsignedp)
2311         temp = expand_binop (mode,
2312                              left ? lshift_optab : rshift_uns_optab,
2313                              shifted, op1, target, unsignedp, methods);
2314
2315       /* Do arithmetic shifts.
2316          Also, if we are going to widen the operand, we can just as well
2317          use an arithmetic right-shift instead of a logical one.  */
2318       if (temp == 0 && ! rotate
2319           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2320         {
2321           enum optab_methods methods1 = methods;
2322
2323           /* If trying to widen a log shift to an arithmetic shift,
2324              don't accept an arithmetic shift of the same size.  */
2325           if (unsignedp)
2326             methods1 = OPTAB_MUST_WIDEN;
2327
2328           /* Arithmetic shift */
2329
2330           temp = expand_binop (mode,
2331                                left ? lshift_optab : rshift_arith_optab,
2332                                shifted, op1, target, unsignedp, methods1);
2333         }
2334
2335       /* We used to try extzv here for logical right shifts, but that was
2336          only useful for one machine, the VAX, and caused poor code
2337          generation there for lshrdi3, so the code was deleted and a
2338          define_expand for lshrsi3 was added to vax.md.  */
2339     }
2340
2341   gcc_assert (temp);
2342   return temp;
2343 }
2344
2345 /* Output a shift instruction for expression code CODE,
2346    with SHIFTED being the rtx for the value to shift,
2347    and AMOUNT the amount to shift by.
2348    Store the result in the rtx TARGET, if that is convenient.
2349    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2350    Return the rtx for where the value is.  */
2351
2352 rtx
2353 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2354               int amount, rtx target, int unsignedp)
2355 {
2356   return expand_shift_1 (code, mode,
2357                          shifted, GEN_INT (amount), target, unsignedp);
2358 }
2359
2360 /* Output a shift instruction for expression code CODE,
2361    with SHIFTED being the rtx for the value to shift,
2362    and AMOUNT the tree for the amount to shift by.
2363    Store the result in the rtx TARGET, if that is convenient.
2364    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2365    Return the rtx for where the value is.  */
2366
2367 rtx
2368 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2369                        tree amount, rtx target, int unsignedp)
2370 {
2371   return expand_shift_1 (code, mode,
2372                          shifted, expand_normal (amount), target, unsignedp);
2373 }
2374
2375 \f
2376 /* Indicates the type of fixup needed after a constant multiplication.
2377    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2378    the result should be negated, and ADD_VARIANT means that the
2379    multiplicand should be added to the result.  */
2380 enum mult_variant {basic_variant, negate_variant, add_variant};
2381
2382 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2383                         const struct mult_cost *, machine_mode mode);
2384 static bool choose_mult_variant (machine_mode, HOST_WIDE_INT,
2385                                  struct algorithm *, enum mult_variant *, int);
2386 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2387                               const struct algorithm *, enum mult_variant);
2388 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2389 static rtx extract_high_half (machine_mode, rtx);
2390 static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2391 static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
2392                                        int, int);
2393 /* Compute and return the best algorithm for multiplying by T.
2394    The algorithm must cost less than cost_limit
2395    If retval.cost >= COST_LIMIT, no algorithm was found and all
2396    other field of the returned struct are undefined.
2397    MODE is the machine mode of the multiplication.  */
2398
2399 static void
2400 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2401             const struct mult_cost *cost_limit, machine_mode mode)
2402 {
2403   int m;
2404   struct algorithm *alg_in, *best_alg;
2405   struct mult_cost best_cost;
2406   struct mult_cost new_limit;
2407   int op_cost, op_latency;
2408   unsigned HOST_WIDE_INT orig_t = t;
2409   unsigned HOST_WIDE_INT q;
2410   int maxm, hash_index;
2411   bool cache_hit = false;
2412   enum alg_code cache_alg = alg_zero;
2413   bool speed = optimize_insn_for_speed_p ();
2414   machine_mode imode;
2415   struct alg_hash_entry *entry_ptr;
2416
2417   /* Indicate that no algorithm is yet found.  If no algorithm
2418      is found, this value will be returned and indicate failure.  */
2419   alg_out->cost.cost = cost_limit->cost + 1;
2420   alg_out->cost.latency = cost_limit->latency + 1;
2421
2422   if (cost_limit->cost < 0
2423       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2424     return;
2425
2426   /* Be prepared for vector modes.  */
2427   imode = GET_MODE_INNER (mode);
2428
2429   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2430
2431   /* Restrict the bits of "t" to the multiplication's mode.  */
2432   t &= GET_MODE_MASK (imode);
2433
2434   /* t == 1 can be done in zero cost.  */
2435   if (t == 1)
2436     {
2437       alg_out->ops = 1;
2438       alg_out->cost.cost = 0;
2439       alg_out->cost.latency = 0;
2440       alg_out->op[0] = alg_m;
2441       return;
2442     }
2443
2444   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2445      fail now.  */
2446   if (t == 0)
2447     {
2448       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2449         return;
2450       else
2451         {
2452           alg_out->ops = 1;
2453           alg_out->cost.cost = zero_cost (speed);
2454           alg_out->cost.latency = zero_cost (speed);
2455           alg_out->op[0] = alg_zero;
2456           return;
2457         }
2458     }
2459
2460   /* We'll be needing a couple extra algorithm structures now.  */
2461
2462   alg_in = XALLOCA (struct algorithm);
2463   best_alg = XALLOCA (struct algorithm);
2464   best_cost = *cost_limit;
2465
2466   /* Compute the hash index.  */
2467   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2468
2469   /* See if we already know what to do for T.  */
2470   entry_ptr = alg_hash_entry_ptr (hash_index);
2471   if (entry_ptr->t == t
2472       && entry_ptr->mode == mode
2473       && entry_ptr->mode == mode
2474       && entry_ptr->speed == speed
2475       && entry_ptr->alg != alg_unknown)
2476     {
2477       cache_alg = entry_ptr->alg;
2478
2479       if (cache_alg == alg_impossible)
2480         {
2481           /* The cache tells us that it's impossible to synthesize
2482              multiplication by T within entry_ptr->cost.  */
2483           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2484             /* COST_LIMIT is at least as restrictive as the one
2485                recorded in the hash table, in which case we have no
2486                hope of synthesizing a multiplication.  Just
2487                return.  */
2488             return;
2489
2490           /* If we get here, COST_LIMIT is less restrictive than the
2491              one recorded in the hash table, so we may be able to
2492              synthesize a multiplication.  Proceed as if we didn't
2493              have the cache entry.  */
2494         }
2495       else
2496         {
2497           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2498             /* The cached algorithm shows that this multiplication
2499                requires more cost than COST_LIMIT.  Just return.  This
2500                way, we don't clobber this cache entry with
2501                alg_impossible but retain useful information.  */
2502             return;
2503
2504           cache_hit = true;
2505
2506           switch (cache_alg)
2507             {
2508             case alg_shift:
2509               goto do_alg_shift;
2510
2511             case alg_add_t_m2:
2512             case alg_sub_t_m2:
2513               goto do_alg_addsub_t_m2;
2514
2515             case alg_add_factor:
2516             case alg_sub_factor:
2517               goto do_alg_addsub_factor;
2518
2519             case alg_add_t2_m:
2520               goto do_alg_add_t2_m;
2521
2522             case alg_sub_t2_m:
2523               goto do_alg_sub_t2_m;
2524
2525             default:
2526               gcc_unreachable ();
2527             }
2528         }
2529     }
2530
2531   /* If we have a group of zero bits at the low-order part of T, try
2532      multiplying by the remaining bits and then doing a shift.  */
2533
2534   if ((t & 1) == 0)
2535     {
2536     do_alg_shift:
2537       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2538       if (m < maxm)
2539         {
2540           q = t >> m;
2541           /* The function expand_shift will choose between a shift and
2542              a sequence of additions, so the observed cost is given as
2543              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2544           op_cost = m * add_cost (speed, mode);
2545           if (shift_cost (speed, mode, m) < op_cost)
2546             op_cost = shift_cost (speed, mode, m);
2547           new_limit.cost = best_cost.cost - op_cost;
2548           new_limit.latency = best_cost.latency - op_cost;
2549           synth_mult (alg_in, q, &new_limit, mode);
2550
2551           alg_in->cost.cost += op_cost;
2552           alg_in->cost.latency += op_cost;
2553           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2554             {
2555               best_cost = alg_in->cost;
2556               std::swap (alg_in, best_alg);
2557               best_alg->log[best_alg->ops] = m;
2558               best_alg->op[best_alg->ops] = alg_shift;
2559             }
2560
2561           /* See if treating ORIG_T as a signed number yields a better
2562              sequence.  Try this sequence only for a negative ORIG_T
2563              as it would be useless for a non-negative ORIG_T.  */
2564           if ((HOST_WIDE_INT) orig_t < 0)
2565             {
2566               /* Shift ORIG_T as follows because a right shift of a
2567                  negative-valued signed type is implementation
2568                  defined.  */
2569               q = ~(~orig_t >> m);
2570               /* The function expand_shift will choose between a shift
2571                  and a sequence of additions, so the observed cost is
2572                  given as MIN (m * add_cost(speed, mode),
2573                  shift_cost(speed, mode, m)).  */
2574               op_cost = m * add_cost (speed, mode);
2575               if (shift_cost (speed, mode, m) < op_cost)
2576                 op_cost = shift_cost (speed, mode, m);
2577               new_limit.cost = best_cost.cost - op_cost;
2578               new_limit.latency = best_cost.latency - op_cost;
2579               synth_mult (alg_in, q, &new_limit, mode);
2580
2581               alg_in->cost.cost += op_cost;
2582               alg_in->cost.latency += op_cost;
2583               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2584                 {
2585                   best_cost = alg_in->cost;
2586                   std::swap (alg_in, best_alg);
2587                   best_alg->log[best_alg->ops] = m;
2588                   best_alg->op[best_alg->ops] = alg_shift;
2589                 }
2590             }
2591         }
2592       if (cache_hit)
2593         goto done;
2594     }
2595
2596   /* If we have an odd number, add or subtract one.  */
2597   if ((t & 1) != 0)
2598     {
2599       unsigned HOST_WIDE_INT w;
2600
2601     do_alg_addsub_t_m2:
2602       for (w = 1; (w & t) != 0; w <<= 1)
2603         ;
2604       /* If T was -1, then W will be zero after the loop.  This is another
2605          case where T ends with ...111.  Handling this with (T + 1) and
2606          subtract 1 produces slightly better code and results in algorithm
2607          selection much faster than treating it like the ...0111 case
2608          below.  */
2609       if (w == 0
2610           || (w > 2
2611               /* Reject the case where t is 3.
2612                  Thus we prefer addition in that case.  */
2613               && t != 3))
2614         {
2615           /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2616
2617           op_cost = add_cost (speed, mode);
2618           new_limit.cost = best_cost.cost - op_cost;
2619           new_limit.latency = best_cost.latency - op_cost;
2620           synth_mult (alg_in, t + 1, &new_limit, mode);
2621
2622           alg_in->cost.cost += op_cost;
2623           alg_in->cost.latency += op_cost;
2624           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2625             {
2626               best_cost = alg_in->cost;
2627               std::swap (alg_in, best_alg);
2628               best_alg->log[best_alg->ops] = 0;
2629               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2630             }
2631         }
2632       else
2633         {
2634           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2635
2636           op_cost = add_cost (speed, mode);
2637           new_limit.cost = best_cost.cost - op_cost;
2638           new_limit.latency = best_cost.latency - op_cost;
2639           synth_mult (alg_in, t - 1, &new_limit, mode);
2640
2641           alg_in->cost.cost += op_cost;
2642           alg_in->cost.latency += op_cost;
2643           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2644             {
2645               best_cost = alg_in->cost;
2646               std::swap (alg_in, best_alg);
2647               best_alg->log[best_alg->ops] = 0;
2648               best_alg->op[best_alg->ops] = alg_add_t_m2;
2649             }
2650         }
2651
2652       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2653          quickly with a - a * n for some appropriate constant n.  */
2654       m = exact_log2 (-orig_t + 1);
2655       if (m >= 0 && m < maxm)
2656         {
2657           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2658           /* If the target has a cheap shift-and-subtract insn use
2659              that in preference to a shift insn followed by a sub insn.
2660              Assume that the shift-and-sub is "atomic" with a latency
2661              equal to it's cost, otherwise assume that on superscalar
2662              hardware the shift may be executed concurrently with the
2663              earlier steps in the algorithm.  */
2664           if (shiftsub1_cost (speed, mode, m) <= op_cost)
2665             {
2666               op_cost = shiftsub1_cost (speed, mode, m);
2667               op_latency = op_cost;
2668             }
2669           else
2670             op_latency = add_cost (speed, mode);
2671
2672           new_limit.cost = best_cost.cost - op_cost;
2673           new_limit.latency = best_cost.latency - op_latency;
2674           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2675                       &new_limit, mode);
2676
2677           alg_in->cost.cost += op_cost;
2678           alg_in->cost.latency += op_latency;
2679           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2680             {
2681               best_cost = alg_in->cost;
2682               std::swap (alg_in, best_alg);
2683               best_alg->log[best_alg->ops] = m;
2684               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2685             }
2686         }
2687
2688       if (cache_hit)
2689         goto done;
2690     }
2691
2692   /* Look for factors of t of the form
2693      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2694      If we find such a factor, we can multiply by t using an algorithm that
2695      multiplies by q, shift the result by m and add/subtract it to itself.
2696
2697      We search for large factors first and loop down, even if large factors
2698      are less probable than small; if we find a large factor we will find a
2699      good sequence quickly, and therefore be able to prune (by decreasing
2700      COST_LIMIT) the search.  */
2701
2702  do_alg_addsub_factor:
2703   for (m = floor_log2 (t - 1); m >= 2; m--)
2704     {
2705       unsigned HOST_WIDE_INT d;
2706
2707       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2708       if (t % d == 0 && t > d && m < maxm
2709           && (!cache_hit || cache_alg == alg_add_factor))
2710         {
2711           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2712           if (shiftadd_cost (speed, mode, m) <= op_cost)
2713             op_cost = shiftadd_cost (speed, mode, m);
2714
2715           op_latency = op_cost;
2716
2717
2718           new_limit.cost = best_cost.cost - op_cost;
2719           new_limit.latency = best_cost.latency - op_latency;
2720           synth_mult (alg_in, t / d, &new_limit, mode);
2721
2722           alg_in->cost.cost += op_cost;
2723           alg_in->cost.latency += op_latency;
2724           if (alg_in->cost.latency < op_cost)
2725             alg_in->cost.latency = op_cost;
2726           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2727             {
2728               best_cost = alg_in->cost;
2729               std::swap (alg_in, best_alg);
2730               best_alg->log[best_alg->ops] = m;
2731               best_alg->op[best_alg->ops] = alg_add_factor;
2732             }
2733           /* Other factors will have been taken care of in the recursion.  */
2734           break;
2735         }
2736
2737       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2738       if (t % d == 0 && t > d && m < maxm
2739           && (!cache_hit || cache_alg == alg_sub_factor))
2740         {
2741           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2742           if (shiftsub0_cost (speed, mode, m) <= op_cost)
2743             op_cost = shiftsub0_cost (speed, mode, m);
2744
2745           op_latency = op_cost;
2746
2747           new_limit.cost = best_cost.cost - op_cost;
2748           new_limit.latency = best_cost.latency - op_latency;
2749           synth_mult (alg_in, t / d, &new_limit, mode);
2750
2751           alg_in->cost.cost += op_cost;
2752           alg_in->cost.latency += op_latency;
2753           if (alg_in->cost.latency < op_cost)
2754             alg_in->cost.latency = op_cost;
2755           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2756             {
2757               best_cost = alg_in->cost;
2758               std::swap (alg_in, best_alg);
2759               best_alg->log[best_alg->ops] = m;
2760               best_alg->op[best_alg->ops] = alg_sub_factor;
2761             }
2762           break;
2763         }
2764     }
2765   if (cache_hit)
2766     goto done;
2767
2768   /* Try shift-and-add (load effective address) instructions,
2769      i.e. do a*3, a*5, a*9.  */
2770   if ((t & 1) != 0)
2771     {
2772     do_alg_add_t2_m:
2773       q = t - 1;
2774       q = q & -q;
2775       m = exact_log2 (q);
2776       if (m >= 0 && m < maxm)
2777         {
2778           op_cost = shiftadd_cost (speed, mode, m);
2779           new_limit.cost = best_cost.cost - op_cost;
2780           new_limit.latency = best_cost.latency - op_cost;
2781           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2782
2783           alg_in->cost.cost += op_cost;
2784           alg_in->cost.latency += op_cost;
2785           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2786             {
2787               best_cost = alg_in->cost;
2788               std::swap (alg_in, best_alg);
2789               best_alg->log[best_alg->ops] = m;
2790               best_alg->op[best_alg->ops] = alg_add_t2_m;
2791             }
2792         }
2793       if (cache_hit)
2794         goto done;
2795
2796     do_alg_sub_t2_m:
2797       q = t + 1;
2798       q = q & -q;
2799       m = exact_log2 (q);
2800       if (m >= 0 && m < maxm)
2801         {
2802           op_cost = shiftsub0_cost (speed, mode, m);
2803           new_limit.cost = best_cost.cost - op_cost;
2804           new_limit.latency = best_cost.latency - op_cost;
2805           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2806
2807           alg_in->cost.cost += op_cost;
2808           alg_in->cost.latency += op_cost;
2809           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2810             {
2811               best_cost = alg_in->cost;
2812               std::swap (alg_in, best_alg);
2813               best_alg->log[best_alg->ops] = m;
2814               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2815             }
2816         }
2817       if (cache_hit)
2818         goto done;
2819     }
2820
2821  done:
2822   /* If best_cost has not decreased, we have not found any algorithm.  */
2823   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2824     {
2825       /* We failed to find an algorithm.  Record alg_impossible for
2826          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2827          we are asked to find an algorithm for T within the same or
2828          lower COST_LIMIT, we can immediately return to the
2829          caller.  */
2830       entry_ptr->t = t;
2831       entry_ptr->mode = mode;
2832       entry_ptr->speed = speed;
2833       entry_ptr->alg = alg_impossible;
2834       entry_ptr->cost = *cost_limit;
2835       return;
2836     }
2837
2838   /* Cache the result.  */
2839   if (!cache_hit)
2840     {
2841       entry_ptr->t = t;
2842       entry_ptr->mode = mode;
2843       entry_ptr->speed = speed;
2844       entry_ptr->alg = best_alg->op[best_alg->ops];
2845       entry_ptr->cost.cost = best_cost.cost;
2846       entry_ptr->cost.latency = best_cost.latency;
2847     }
2848
2849   /* If we are getting a too long sequence for `struct algorithm'
2850      to record, make this search fail.  */
2851   if (best_alg->ops == MAX_BITS_PER_WORD)
2852     return;
2853
2854   /* Copy the algorithm from temporary space to the space at alg_out.
2855      We avoid using structure assignment because the majority of
2856      best_alg is normally undefined, and this is a critical function.  */
2857   alg_out->ops = best_alg->ops + 1;
2858   alg_out->cost = best_cost;
2859   memcpy (alg_out->op, best_alg->op,
2860           alg_out->ops * sizeof *alg_out->op);
2861   memcpy (alg_out->log, best_alg->log,
2862           alg_out->ops * sizeof *alg_out->log);
2863 }
2864 \f
2865 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2866    Try three variations:
2867
2868        - a shift/add sequence based on VAL itself
2869        - a shift/add sequence based on -VAL, followed by a negation
2870        - a shift/add sequence based on VAL - 1, followed by an addition.
2871
2872    Return true if the cheapest of these cost less than MULT_COST,
2873    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2874
2875 static bool
2876 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
2877                      struct algorithm *alg, enum mult_variant *variant,
2878                      int mult_cost)
2879 {
2880   struct algorithm alg2;
2881   struct mult_cost limit;
2882   int op_cost;
2883   bool speed = optimize_insn_for_speed_p ();
2884
2885   /* Fail quickly for impossible bounds.  */
2886   if (mult_cost < 0)
2887     return false;
2888
2889   /* Ensure that mult_cost provides a reasonable upper bound.
2890      Any constant multiplication can be performed with less
2891      than 2 * bits additions.  */
2892   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2893   if (mult_cost > op_cost)
2894     mult_cost = op_cost;
2895
2896   *variant = basic_variant;
2897   limit.cost = mult_cost;
2898   limit.latency = mult_cost;
2899   synth_mult (alg, val, &limit, mode);
2900
2901   /* This works only if the inverted value actually fits in an
2902      `unsigned int' */
2903   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2904     {
2905       op_cost = neg_cost (speed, mode);
2906       if (MULT_COST_LESS (&alg->cost, mult_cost))
2907         {
2908           limit.cost = alg->cost.cost - op_cost;
2909           limit.latency = alg->cost.latency - op_cost;
2910         }
2911       else
2912         {
2913           limit.cost = mult_cost - op_cost;
2914           limit.latency = mult_cost - op_cost;
2915         }
2916
2917       synth_mult (&alg2, -val, &limit, mode);
2918       alg2.cost.cost += op_cost;
2919       alg2.cost.latency += op_cost;
2920       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2921         *alg = alg2, *variant = negate_variant;
2922     }
2923
2924   /* This proves very useful for division-by-constant.  */
2925   op_cost = add_cost (speed, mode);
2926   if (MULT_COST_LESS (&alg->cost, mult_cost))
2927     {
2928       limit.cost = alg->cost.cost - op_cost;
2929       limit.latency = alg->cost.latency - op_cost;
2930     }
2931   else
2932     {
2933       limit.cost = mult_cost - op_cost;
2934       limit.latency = mult_cost - op_cost;
2935     }
2936
2937   synth_mult (&alg2, val - 1, &limit, mode);
2938   alg2.cost.cost += op_cost;
2939   alg2.cost.latency += op_cost;
2940   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2941     *alg = alg2, *variant = add_variant;
2942
2943   return MULT_COST_LESS (&alg->cost, mult_cost);
2944 }
2945
2946 /* A subroutine of expand_mult, used for constant multiplications.
2947    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2948    convenient.  Use the shift/add sequence described by ALG and apply
2949    the final fixup specified by VARIANT.  */
2950
2951 static rtx
2952 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
2953                    rtx target, const struct algorithm *alg,
2954                    enum mult_variant variant)
2955 {
2956   HOST_WIDE_INT val_so_far;
2957   rtx_insn *insn;
2958   rtx accum, tem;
2959   int opno;
2960   machine_mode nmode;
2961
2962   /* Avoid referencing memory over and over and invalid sharing
2963      on SUBREGs.  */
2964   op0 = force_reg (mode, op0);
2965
2966   /* ACCUM starts out either as OP0 or as a zero, depending on
2967      the first operation.  */
2968
2969   if (alg->op[0] == alg_zero)
2970     {
2971       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2972       val_so_far = 0;
2973     }
2974   else if (alg->op[0] == alg_m)
2975     {
2976       accum = copy_to_mode_reg (mode, op0);
2977       val_so_far = 1;
2978     }
2979   else
2980     gcc_unreachable ();
2981
2982   for (opno = 1; opno < alg->ops; opno++)
2983     {
2984       int log = alg->log[opno];
2985       rtx shift_subtarget = optimize ? 0 : accum;
2986       rtx add_target
2987         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2988            && !optimize)
2989           ? target : 0;
2990       rtx accum_target = optimize ? 0 : accum;
2991       rtx accum_inner;
2992
2993       switch (alg->op[opno])
2994         {
2995         case alg_shift:
2996           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2997           /* REG_EQUAL note will be attached to the following insn.  */
2998           emit_move_insn (accum, tem);
2999           val_so_far <<= log;
3000           break;
3001
3002         case alg_add_t_m2:
3003           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3004           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3005                                  add_target ? add_target : accum_target);
3006           val_so_far += (HOST_WIDE_INT) 1 << log;
3007           break;
3008
3009         case alg_sub_t_m2:
3010           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3011           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3012                                  add_target ? add_target : accum_target);
3013           val_so_far -= (HOST_WIDE_INT) 1 << log;
3014           break;
3015
3016         case alg_add_t2_m:
3017           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3018                                 log, shift_subtarget, 0);
3019           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3020                                  add_target ? add_target : accum_target);
3021           val_so_far = (val_so_far << log) + 1;
3022           break;
3023
3024         case alg_sub_t2_m:
3025           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3026                                 log, shift_subtarget, 0);
3027           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3028                                  add_target ? add_target : accum_target);
3029           val_so_far = (val_so_far << log) - 1;
3030           break;
3031
3032         case alg_add_factor:
3033           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3034           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3035                                  add_target ? add_target : accum_target);
3036           val_so_far += val_so_far << log;
3037           break;
3038
3039         case alg_sub_factor:
3040           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3041           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3042                                  (add_target
3043                                   ? add_target : (optimize ? 0 : tem)));
3044           val_so_far = (val_so_far << log) - val_so_far;
3045           break;
3046
3047         default:
3048           gcc_unreachable ();
3049         }
3050
3051       if (SCALAR_INT_MODE_P (mode))
3052         {
3053           /* Write a REG_EQUAL note on the last insn so that we can cse
3054              multiplication sequences.  Note that if ACCUM is a SUBREG,
3055              we've set the inner register and must properly indicate that.  */
3056           tem = op0, nmode = mode;
3057           accum_inner = accum;
3058           if (GET_CODE (accum) == SUBREG)
3059             {
3060               accum_inner = SUBREG_REG (accum);
3061               nmode = GET_MODE (accum_inner);
3062               tem = gen_lowpart (nmode, op0);
3063             }
3064
3065           insn = get_last_insn ();
3066           set_dst_reg_note (insn, REG_EQUAL,
3067                             gen_rtx_MULT (nmode, tem,
3068                                           gen_int_mode (val_so_far, nmode)),
3069                             accum_inner);
3070         }
3071     }
3072
3073   if (variant == negate_variant)
3074     {
3075       val_so_far = -val_so_far;
3076       accum = expand_unop (mode, neg_optab, accum, target, 0);
3077     }
3078   else if (variant == add_variant)
3079     {
3080       val_so_far = val_so_far + 1;
3081       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3082     }
3083
3084   /* Compare only the bits of val and val_so_far that are significant
3085      in the result mode, to avoid sign-/zero-extension confusion.  */
3086   nmode = GET_MODE_INNER (mode);
3087   val &= GET_MODE_MASK (nmode);
3088   val_so_far &= GET_MODE_MASK (nmode);
3089   gcc_assert (val == val_so_far);
3090
3091   return accum;
3092 }
3093
3094 /* Perform a multiplication and return an rtx for the result.
3095    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3096    TARGET is a suggestion for where to store the result (an rtx).
3097
3098    We check specially for a constant integer as OP1.
3099    If you want this check for OP0 as well, then before calling
3100    you should swap the two operands if OP0 would be constant.  */
3101
3102 rtx
3103 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3104              int unsignedp)
3105 {
3106   enum mult_variant variant;
3107   struct algorithm algorithm;
3108   rtx scalar_op1;
3109   int max_cost;
3110   bool speed = optimize_insn_for_speed_p ();
3111   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3112
3113   if (CONSTANT_P (op0))
3114     std::swap (op0, op1);
3115
3116   /* For vectors, there are several simplifications that can be made if
3117      all elements of the vector constant are identical.  */
3118   scalar_op1 = unwrap_const_vec_duplicate (op1);
3119
3120   if (INTEGRAL_MODE_P (mode))
3121     {
3122       rtx fake_reg;
3123       HOST_WIDE_INT coeff;
3124       bool is_neg;
3125       int mode_bitsize;
3126
3127       if (op1 == CONST0_RTX (mode))
3128         return op1;
3129       if (op1 == CONST1_RTX (mode))
3130         return op0;
3131       if (op1 == CONSTM1_RTX (mode))
3132         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3133                             op0, target, 0);
3134
3135       if (do_trapv)
3136         goto skip_synth;
3137
3138       /* If mode is integer vector mode, check if the backend supports
3139          vector lshift (by scalar or vector) at all.  If not, we can't use
3140          synthetized multiply.  */
3141       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3142           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3143           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3144         goto skip_synth;
3145
3146       /* These are the operations that are potentially turned into
3147          a sequence of shifts and additions.  */
3148       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3149
3150       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3151          less than or equal in size to `unsigned int' this doesn't matter.
3152          If the mode is larger than `unsigned int', then synth_mult works
3153          only if the constant value exactly fits in an `unsigned int' without
3154          any truncation.  This means that multiplying by negative values does
3155          not work; results are off by 2^32 on a 32 bit machine.  */
3156       if (CONST_INT_P (scalar_op1))
3157         {
3158           coeff = INTVAL (scalar_op1);
3159           is_neg = coeff < 0;
3160         }
3161 #if TARGET_SUPPORTS_WIDE_INT
3162       else if (CONST_WIDE_INT_P (scalar_op1))
3163 #else
3164       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3165 #endif
3166         {
3167           int shift = wi::exact_log2 (std::make_pair (scalar_op1, mode));
3168           /* Perfect power of 2 (other than 1, which is handled above).  */
3169           if (shift > 0)
3170             return expand_shift (LSHIFT_EXPR, mode, op0,
3171                                  shift, target, unsignedp);
3172           else
3173             goto skip_synth;
3174         }
3175       else
3176         goto skip_synth;
3177
3178       /* We used to test optimize here, on the grounds that it's better to
3179          produce a smaller program when -O is not used.  But this causes
3180          such a terrible slowdown sometimes that it seems better to always
3181          use synth_mult.  */
3182
3183       /* Special case powers of two.  */
3184       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3185           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3186         return expand_shift (LSHIFT_EXPR, mode, op0,
3187                              floor_log2 (coeff), target, unsignedp);
3188
3189       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3190
3191       /* Attempt to handle multiplication of DImode values by negative
3192          coefficients, by performing the multiplication by a positive
3193          multiplier and then inverting the result.  */
3194       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3195         {
3196           /* Its safe to use -coeff even for INT_MIN, as the
3197              result is interpreted as an unsigned coefficient.
3198              Exclude cost of op0 from max_cost to match the cost
3199              calculation of the synth_mult.  */
3200           coeff = -(unsigned HOST_WIDE_INT) coeff;
3201           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3202                                     mode, speed)
3203                       - neg_cost (speed, mode));
3204           if (max_cost <= 0)
3205             goto skip_synth;
3206
3207           /* Special case powers of two.  */
3208           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3209             {
3210               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3211                                        floor_log2 (coeff), target, unsignedp);
3212               return expand_unop (mode, neg_optab, temp, target, 0);
3213             }
3214
3215           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3216                                    max_cost))
3217             {
3218               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3219                                             &algorithm, variant);
3220               return expand_unop (mode, neg_optab, temp, target, 0);
3221             }
3222           goto skip_synth;
3223         }
3224
3225       /* Exclude cost of op0 from max_cost to match the cost
3226          calculation of the synth_mult.  */
3227       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3228       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3229         return expand_mult_const (mode, op0, coeff, target,
3230                                   &algorithm, variant);
3231     }
3232  skip_synth:
3233
3234   /* Expand x*2.0 as x+x.  */
3235   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3236       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3237     {
3238       op0 = force_reg (GET_MODE (op0), op0);
3239       return expand_binop (mode, add_optab, op0, op0,
3240                            target, unsignedp, OPTAB_LIB_WIDEN);
3241     }
3242
3243   /* This used to use umul_optab if unsigned, but for non-widening multiply
3244      there is no difference between signed and unsigned.  */
3245   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3246                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3247   gcc_assert (op0);
3248   return op0;
3249 }
3250
3251 /* Return a cost estimate for multiplying a register by the given
3252    COEFFicient in the given MODE and SPEED.  */
3253
3254 int
3255 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3256 {
3257   int max_cost;
3258   struct algorithm algorithm;
3259   enum mult_variant variant;
3260
3261   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3262   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3263                            mode, speed);
3264   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3265     return algorithm.cost.cost;
3266   else
3267     return max_cost;
3268 }
3269
3270 /* Perform a widening multiplication and return an rtx for the result.
3271    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3272    TARGET is a suggestion for where to store the result (an rtx).
3273    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3274    or smul_widen_optab.
3275
3276    We check specially for a constant integer as OP1, comparing the
3277    cost of a widening multiply against the cost of a sequence of shifts
3278    and adds.  */
3279
3280 rtx
3281 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3282                       int unsignedp, optab this_optab)
3283 {
3284   bool speed = optimize_insn_for_speed_p ();
3285   rtx cop1;
3286
3287   if (CONST_INT_P (op1)
3288       && GET_MODE (op0) != VOIDmode
3289       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3290                                 this_optab == umul_widen_optab))
3291       && CONST_INT_P (cop1)
3292       && (INTVAL (cop1) >= 0
3293           || HWI_COMPUTABLE_MODE_P (mode)))
3294     {
3295       HOST_WIDE_INT coeff = INTVAL (cop1);
3296       int max_cost;
3297       enum mult_variant variant;
3298       struct algorithm algorithm;
3299
3300       if (coeff == 0)
3301         return CONST0_RTX (mode);
3302
3303       /* Special case powers of two.  */
3304       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3305         {
3306           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3307           return expand_shift (LSHIFT_EXPR, mode, op0,
3308                                floor_log2 (coeff), target, unsignedp);
3309         }
3310
3311       /* Exclude cost of op0 from max_cost to match the cost
3312          calculation of the synth_mult.  */
3313       max_cost = mul_widen_cost (speed, mode);
3314       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3315                                max_cost))
3316         {
3317           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3318           return expand_mult_const (mode, op0, coeff, target,
3319                                     &algorithm, variant);
3320         }
3321     }
3322   return expand_binop (mode, this_optab, op0, op1, target,
3323                        unsignedp, OPTAB_LIB_WIDEN);
3324 }
3325 \f
3326 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3327    replace division by D, and put the least significant N bits of the result
3328    in *MULTIPLIER_PTR and return the most significant bit.
3329
3330    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3331    needed precision is in PRECISION (should be <= N).
3332
3333    PRECISION should be as small as possible so this function can choose
3334    multiplier more freely.
3335
3336    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3337    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3338
3339    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3340    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3341
3342 unsigned HOST_WIDE_INT
3343 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3344                    unsigned HOST_WIDE_INT *multiplier_ptr,
3345                    int *post_shift_ptr, int *lgup_ptr)
3346 {
3347   int lgup, post_shift;
3348   int pow, pow2;
3349
3350   /* lgup = ceil(log2(divisor)); */
3351   lgup = ceil_log2 (d);
3352
3353   gcc_assert (lgup <= n);
3354
3355   pow = n + lgup;
3356   pow2 = n + lgup - precision;
3357
3358   /* mlow = 2^(N + lgup)/d */
3359   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3360   wide_int mlow = wi::udiv_trunc (val, d);
3361
3362   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3363   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3364   wide_int mhigh = wi::udiv_trunc (val, d);
3365
3366   /* If precision == N, then mlow, mhigh exceed 2^N
3367      (but they do not exceed 2^(N+1)).  */
3368
3369   /* Reduce to lowest terms.  */
3370   for (post_shift = lgup; post_shift > 0; post_shift--)
3371     {
3372       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3373                                                        HOST_BITS_PER_WIDE_INT);
3374       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3375                                                        HOST_BITS_PER_WIDE_INT);
3376       if (ml_lo >= mh_lo)
3377         break;
3378
3379       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3380       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3381     }
3382
3383   *post_shift_ptr = post_shift;
3384   *lgup_ptr = lgup;
3385   if (n < HOST_BITS_PER_WIDE_INT)
3386     {
3387       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3388       *multiplier_ptr = mhigh.to_uhwi () & mask;
3389       return mhigh.to_uhwi () >= mask;
3390     }
3391   else
3392     {
3393       *multiplier_ptr = mhigh.to_uhwi ();
3394       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3395     }
3396 }
3397
3398 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3399    congruent to 1 (mod 2**N).  */
3400
3401 static unsigned HOST_WIDE_INT
3402 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3403 {
3404   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3405
3406   /* The algorithm notes that the choice y = x satisfies
3407      x*y == 1 mod 2^3, since x is assumed odd.
3408      Each iteration doubles the number of bits of significance in y.  */
3409
3410   unsigned HOST_WIDE_INT mask;
3411   unsigned HOST_WIDE_INT y = x;
3412   int nbit = 3;
3413
3414   mask = (n == HOST_BITS_PER_WIDE_INT
3415           ? ~(unsigned HOST_WIDE_INT) 0
3416           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3417
3418   while (nbit < n)
3419     {
3420       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3421       nbit *= 2;
3422     }
3423   return y;
3424 }
3425
3426 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3427    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3428    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3429    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3430    become signed.
3431
3432    The result is put in TARGET if that is convenient.
3433
3434    MODE is the mode of operation.  */
3435
3436 rtx
3437 expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
3438                              rtx op1, rtx target, int unsignedp)
3439 {
3440   rtx tem;
3441   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3442
3443   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3444                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3445   tem = expand_and (mode, tem, op1, NULL_RTX);
3446   adj_operand
3447     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3448                      adj_operand);
3449
3450   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3451                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3452   tem = expand_and (mode, tem, op0, NULL_RTX);
3453   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3454                           target);
3455
3456   return target;
3457 }
3458
3459 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3460
3461 static rtx
3462 extract_high_half (machine_mode mode, rtx op)
3463 {
3464   machine_mode wider_mode;
3465
3466   if (mode == word_mode)
3467     return gen_highpart (mode, op);
3468
3469   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3470
3471   wider_mode = GET_MODE_WIDER_MODE (mode);
3472   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3473                      GET_MODE_BITSIZE (mode), 0, 1);
3474   return convert_modes (mode, wider_mode, op, 0);
3475 }
3476
3477 /* Like expmed_mult_highpart, but only consider using a multiplication
3478    optab.  OP1 is an rtx for the constant operand.  */
3479
3480 static rtx
3481 expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
3482                             rtx target, int unsignedp, int max_cost)
3483 {
3484   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3485   machine_mode wider_mode;
3486   optab moptab;
3487   rtx tem;
3488   int size;
3489   bool speed = optimize_insn_for_speed_p ();
3490
3491   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3492
3493   wider_mode = GET_MODE_WIDER_MODE (mode);
3494   size = GET_MODE_BITSIZE (mode);
3495
3496   /* Firstly, try using a multiplication insn that only generates the needed
3497      high part of the product, and in the sign flavor of unsignedp.  */
3498   if (mul_highpart_cost (speed, mode) < max_cost)
3499     {
3500       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3501       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3502                           unsignedp, OPTAB_DIRECT);
3503       if (tem)
3504         return tem;
3505     }
3506
3507   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3508      Need to adjust the result after the multiplication.  */
3509   if (size - 1 < BITS_PER_WORD
3510       && (mul_highpart_cost (speed, mode)
3511           + 2 * shift_cost (speed, mode, size-1)
3512           + 4 * add_cost (speed, mode) < max_cost))
3513     {
3514       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3515       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3516                           unsignedp, OPTAB_DIRECT);
3517       if (tem)
3518         /* We used the wrong signedness.  Adjust the result.  */
3519         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3520                                             tem, unsignedp);
3521     }
3522
3523   /* Try widening multiplication.  */
3524   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3525   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3526       && mul_widen_cost (speed, wider_mode) < max_cost)
3527     {
3528       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3529                           unsignedp, OPTAB_WIDEN);
3530       if (tem)
3531         return extract_high_half (mode, tem);
3532     }
3533
3534   /* Try widening the mode and perform a non-widening multiplication.  */
3535   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3536       && size - 1 < BITS_PER_WORD
3537       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3538           < max_cost))
3539     {
3540       rtx_insn *insns;
3541       rtx wop0, wop1;
3542
3543       /* We need to widen the operands, for example to ensure the
3544          constant multiplier is correctly sign or zero extended.
3545          Use a sequence to clean-up any instructions emitted by
3546          the conversions if things don't work out.  */
3547       start_sequence ();
3548       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3549       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3550       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3551                           unsignedp, OPTAB_WIDEN);
3552       insns = get_insns ();
3553       end_sequence ();
3554
3555       if (tem)
3556         {
3557           emit_insn (insns);
3558           return extract_high_half (mode, tem);
3559         }
3560     }
3561
3562   /* Try widening multiplication of opposite signedness, and adjust.  */
3563   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3564   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3565       && size - 1 < BITS_PER_WORD
3566       && (mul_widen_cost (speed, wider_mode)
3567           + 2 * shift_cost (speed, mode, size-1)
3568           + 4 * add_cost (speed, mode) < max_cost))
3569     {
3570       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3571                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3572       if (tem != 0)
3573         {
3574           tem = extract_high_half (mode, tem);
3575           /* We used the wrong signedness.  Adjust the result.  */
3576           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3577                                               target, unsignedp);
3578         }
3579     }
3580
3581   return 0;
3582 }
3583
3584 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3585    putting the high half of the result in TARGET if that is convenient,
3586    and return where the result is.  If the operation can not be performed,
3587    0 is returned.
3588
3589    MODE is the mode of operation and result.
3590
3591    UNSIGNEDP nonzero means unsigned multiply.
3592
3593    MAX_COST is the total allowed cost for the expanded RTL.  */
3594
3595 static rtx
3596 expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
3597                       rtx target, int unsignedp, int max_cost)
3598 {
3599   machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3600   unsigned HOST_WIDE_INT cnst1;
3601   int extra_cost;
3602   bool sign_adjust = false;
3603   enum mult_variant variant;
3604   struct algorithm alg;
3605   rtx tem;
3606   bool speed = optimize_insn_for_speed_p ();
3607
3608   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3609   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3610   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3611
3612   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3613
3614   /* We can't optimize modes wider than BITS_PER_WORD.
3615      ??? We might be able to perform double-word arithmetic if
3616      mode == word_mode, however all the cost calculations in
3617      synth_mult etc. assume single-word operations.  */
3618   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3619     return expmed_mult_highpart_optab (mode, op0, op1, target,
3620                                        unsignedp, max_cost);
3621
3622   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3623
3624   /* Check whether we try to multiply by a negative constant.  */
3625   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3626     {
3627       sign_adjust = true;
3628       extra_cost += add_cost (speed, mode);
3629     }
3630
3631   /* See whether shift/add multiplication is cheap enough.  */
3632   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3633                            max_cost - extra_cost))
3634     {
3635       /* See whether the specialized multiplication optabs are
3636          cheaper than the shift/add version.  */
3637       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3638                                         alg.cost.cost + extra_cost);
3639       if (tem)
3640         return tem;
3641
3642       tem = convert_to_mode (wider_mode, op0, unsignedp);
3643       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3644       tem = extract_high_half (mode, tem);
3645
3646       /* Adjust result for signedness.  */
3647       if (sign_adjust)
3648         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3649
3650       return tem;
3651     }
3652   return expmed_mult_highpart_optab (mode, op0, op1, target,
3653                                      unsignedp, max_cost);
3654 }
3655
3656
3657 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3658
3659 static rtx
3660 expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3661 {
3662   rtx result, temp, shift;
3663   rtx_code_label *label;
3664   int logd;
3665   int prec = GET_MODE_PRECISION (mode);
3666
3667   logd = floor_log2 (d);
3668   result = gen_reg_rtx (mode);
3669
3670   /* Avoid conditional branches when they're expensive.  */
3671   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3672       && optimize_insn_for_speed_p ())
3673     {
3674       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3675                                       mode, 0, -1);
3676       if (signmask)
3677         {
3678           HOST_WIDE_INT masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3679           signmask = force_reg (mode, signmask);
3680           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3681
3682           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3683              which instruction sequence to use.  If logical right shifts
3684              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3685              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3686
3687           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3688           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3689               || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
3690                   > COSTS_N_INSNS (2)))
3691             {
3692               temp = expand_binop (mode, xor_optab, op0, signmask,
3693                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3694               temp = expand_binop (mode, sub_optab, temp, signmask,
3695                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3696               temp = expand_binop (mode, and_optab, temp,
3697                                    gen_int_mode (masklow, mode),
3698                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3699               temp = expand_binop (mode, xor_optab, temp, signmask,
3700                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3701               temp = expand_binop (mode, sub_optab, temp, signmask,
3702                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3703             }
3704           else
3705             {
3706               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3707                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3708               signmask = force_reg (mode, signmask);
3709
3710               temp = expand_binop (mode, add_optab, op0, signmask,
3711                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3712               temp = expand_binop (mode, and_optab, temp,
3713                                    gen_int_mode (masklow, mode),
3714                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3715               temp = expand_binop (mode, sub_optab, temp, signmask,
3716                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3717             }
3718           return temp;
3719         }
3720     }
3721
3722   /* Mask contains the mode's signbit and the significant bits of the
3723      modulus.  By including the signbit in the operation, many targets
3724      can avoid an explicit compare operation in the following comparison
3725      against zero.  */
3726   wide_int mask = wi::mask (logd, false, prec);
3727   mask = wi::set_bit (mask, prec - 1);
3728
3729   temp = expand_binop (mode, and_optab, op0,
3730                        immed_wide_int_const (mask, mode),
3731                        result, 1, OPTAB_LIB_WIDEN);
3732   if (temp != result)
3733     emit_move_insn (result, temp);
3734
3735   label = gen_label_rtx ();
3736   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3737
3738   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3739                        0, OPTAB_LIB_WIDEN);
3740
3741   mask = wi::mask (logd, true, prec);
3742   temp = expand_binop (mode, ior_optab, temp,
3743                        immed_wide_int_const (mask, mode),
3744                        result, 1, OPTAB_LIB_WIDEN);
3745   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3746                        0, OPTAB_LIB_WIDEN);
3747   if (temp != result)
3748     emit_move_insn (result, temp);
3749   emit_label (label);
3750   return result;
3751 }
3752
3753 /* Expand signed division of OP0 by a power of two D in mode MODE.
3754    This routine is only called for positive values of D.  */
3755
3756 static rtx
3757 expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3758 {
3759   rtx temp;
3760   rtx_code_label *label;
3761   int logd;
3762
3763   logd = floor_log2 (d);
3764
3765   if (d == 2
3766       && BRANCH_COST (optimize_insn_for_speed_p (),
3767                       false) >= 1)
3768     {
3769       temp = gen_reg_rtx (mode);
3770       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3771       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3772                            0, OPTAB_LIB_WIDEN);
3773       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3774     }
3775
3776   if (HAVE_conditional_move
3777       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
3778     {
3779       rtx temp2;
3780
3781       start_sequence ();
3782       temp2 = copy_to_mode_reg (mode, op0);
3783       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3784                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3785       temp = force_reg (mode, temp);
3786
3787       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3788       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3789                                      mode, temp, temp2, mode, 0);
3790       if (temp2)
3791         {
3792           rtx_insn *seq = get_insns ();
3793           end_sequence ();
3794           emit_insn (seq);
3795           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3796         }
3797       end_sequence ();
3798     }
3799
3800   if (BRANCH_COST (optimize_insn_for_speed_p (),
3801                    false) >= 2)
3802     {
3803       int ushift = GET_MODE_BITSIZE (mode) - logd;
3804
3805       temp = gen_reg_rtx (mode);
3806       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3807       if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3808           || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3809              > COSTS_N_INSNS (1))
3810         temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3811                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3812       else
3813         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3814                              ushift, NULL_RTX, 1);
3815       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3816                            0, OPTAB_LIB_WIDEN);
3817       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3818     }
3819
3820   label = gen_label_rtx ();
3821   temp = copy_to_mode_reg (mode, op0);
3822   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3823   expand_inc (temp, gen_int_mode (d - 1, mode));
3824   emit_label (label);
3825   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3826 }
3827 \f
3828 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3829    if that is convenient, and returning where the result is.
3830    You may request either the quotient or the remainder as the result;
3831    specify REM_FLAG nonzero to get the remainder.
3832
3833    CODE is the expression code for which kind of division this is;
3834    it controls how rounding is done.  MODE is the machine mode to use.
3835    UNSIGNEDP nonzero means do unsigned division.  */
3836
3837 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3838    and then correct it by or'ing in missing high bits
3839    if result of ANDI is nonzero.
3840    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3841    This could optimize to a bfexts instruction.
3842    But C doesn't use these operations, so their optimizations are
3843    left for later.  */
3844 /* ??? For modulo, we don't actually need the highpart of the first product,
3845    the low part will do nicely.  And for small divisors, the second multiply
3846    can also be a low-part only multiply or even be completely left out.
3847    E.g. to calculate the remainder of a division by 3 with a 32 bit
3848    multiply, multiply with 0x55555556 and extract the upper two bits;
3849    the result is exact for inputs up to 0x1fffffff.
3850    The input range can be reduced by using cross-sum rules.
3851    For odd divisors >= 3, the following table gives right shift counts
3852    so that if a number is shifted by an integer multiple of the given
3853    amount, the remainder stays the same:
3854    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3855    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3856    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3857    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3858    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3859
3860    Cross-sum rules for even numbers can be derived by leaving as many bits
3861    to the right alone as the divisor has zeros to the right.
3862    E.g. if x is an unsigned 32 bit number:
3863    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3864    */
3865
3866 rtx
3867 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
3868                rtx op0, rtx op1, rtx target, int unsignedp)
3869 {
3870   machine_mode compute_mode;
3871   rtx tquotient;
3872   rtx quotient = 0, remainder = 0;
3873   rtx_insn *last;
3874   int size;
3875   rtx_insn *insn;
3876   optab optab1, optab2;
3877   int op1_is_constant, op1_is_pow2 = 0;
3878   int max_cost, extra_cost;
3879   static HOST_WIDE_INT last_div_const = 0;
3880   bool speed = optimize_insn_for_speed_p ();
3881
3882   op1_is_constant = CONST_INT_P (op1);
3883   if (op1_is_constant)
3884     {
3885       unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1);
3886       if (unsignedp)
3887         ext_op1 &= GET_MODE_MASK (mode);
3888       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3889                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3890     }
3891
3892   /*
3893      This is the structure of expand_divmod:
3894
3895      First comes code to fix up the operands so we can perform the operations
3896      correctly and efficiently.
3897
3898      Second comes a switch statement with code specific for each rounding mode.
3899      For some special operands this code emits all RTL for the desired
3900      operation, for other cases, it generates only a quotient and stores it in
3901      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3902      to indicate that it has not done anything.
3903
3904      Last comes code that finishes the operation.  If QUOTIENT is set and
3905      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3906      QUOTIENT is not set, it is computed using trunc rounding.
3907
3908      We try to generate special code for division and remainder when OP1 is a
3909      constant.  If |OP1| = 2**n we can use shifts and some other fast
3910      operations.  For other values of OP1, we compute a carefully selected
3911      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3912      by m.
3913
3914      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3915      half of the product.  Different strategies for generating the product are
3916      implemented in expmed_mult_highpart.
3917
3918      If what we actually want is the remainder, we generate that by another
3919      by-constant multiplication and a subtraction.  */
3920
3921   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3922      code below will malfunction if we are, so check here and handle
3923      the special case if so.  */
3924   if (op1 == const1_rtx)
3925     return rem_flag ? const0_rtx : op0;
3926
3927     /* When dividing by -1, we could get an overflow.
3928      negv_optab can handle overflows.  */
3929   if (! unsignedp && op1 == constm1_rtx)
3930     {
3931       if (rem_flag)
3932         return const0_rtx;
3933       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
3934                           ? negv_optab : neg_optab, op0, target, 0);
3935     }
3936
3937   if (target
3938       /* Don't use the function value register as a target
3939          since we have to read it as well as write it,
3940          and function-inlining gets confused by this.  */
3941       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3942           /* Don't clobber an operand while doing a multi-step calculation.  */
3943           || ((rem_flag || op1_is_constant)
3944               && (reg_mentioned_p (target, op0)
3945                   || (MEM_P (op0) && MEM_P (target))))
3946           || reg_mentioned_p (target, op1)
3947           || (MEM_P (op1) && MEM_P (target))))
3948     target = 0;
3949
3950   /* Get the mode in which to perform this computation.  Normally it will
3951      be MODE, but sometimes we can't do the desired operation in MODE.
3952      If so, pick a wider mode in which we can do the operation.  Convert
3953      to that mode at the start to avoid repeated conversions.
3954
3955      First see what operations we need.  These depend on the expression
3956      we are evaluating.  (We assume that divxx3 insns exist under the
3957      same conditions that modxx3 insns and that these insns don't normally
3958      fail.  If these assumptions are not correct, we may generate less
3959      efficient code in some cases.)
3960
3961      Then see if we find a mode in which we can open-code that operation
3962      (either a division, modulus, or shift).  Finally, check for the smallest
3963      mode for which we can do the operation with a library call.  */
3964
3965   /* We might want to refine this now that we have division-by-constant
3966      optimization.  Since expmed_mult_highpart tries so many variants, it is
3967      not straightforward to generalize this.  Maybe we should make an array
3968      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3969
3970   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3971             ? (unsignedp ? lshr_optab : ashr_optab)
3972             : (unsignedp ? udiv_optab : sdiv_optab));
3973   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3974             ? optab1
3975             : (unsignedp ? udivmod_optab : sdivmod_optab));
3976
3977   for (compute_mode = mode; compute_mode != VOIDmode;
3978        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3979     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3980         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3981       break;
3982
3983   if (compute_mode == VOIDmode)
3984     for (compute_mode = mode; compute_mode != VOIDmode;
3985          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3986       if (optab_libfunc (optab1, compute_mode)
3987           || optab_libfunc (optab2, compute_mode))
3988         break;
3989
3990   /* If we still couldn't find a mode, use MODE, but expand_binop will
3991      probably die.  */
3992   if (compute_mode == VOIDmode)
3993     compute_mode = mode;
3994
3995   if (target && GET_MODE (target) == compute_mode)
3996     tquotient = target;
3997   else
3998     tquotient = gen_reg_rtx (compute_mode);
3999
4000   size = GET_MODE_BITSIZE (compute_mode);
4001 #if 0
4002   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4003      (mode), and thereby get better code when OP1 is a constant.  Do that
4004      later.  It will require going over all usages of SIZE below.  */
4005   size = GET_MODE_BITSIZE (mode);
4006 #endif
4007
4008   /* Only deduct something for a REM if the last divide done was
4009      for a different constant.   Then set the constant of the last
4010      divide.  */
4011   max_cost = (unsignedp
4012               ? udiv_cost (speed, compute_mode)
4013               : sdiv_cost (speed, compute_mode));
4014   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4015                      && INTVAL (op1) == last_div_const))
4016     max_cost -= (mul_cost (speed, compute_mode)
4017                  + add_cost (speed, compute_mode));
4018
4019   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4020
4021   /* Now convert to the best mode to use.  */
4022   if (compute_mode != mode)
4023     {
4024       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4025       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4026
4027       /* convert_modes may have placed op1 into a register, so we
4028          must recompute the following.  */
4029       op1_is_constant = CONST_INT_P (op1);
4030       op1_is_pow2 = (op1_is_constant
4031                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4032                           || (! unsignedp
4033                               && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1))))));
4034     }
4035
4036   /* If one of the operands is a volatile MEM, copy it into a register.  */
4037
4038   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4039     op0 = force_reg (compute_mode, op0);
4040   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4041     op1 = force_reg (compute_mode, op1);
4042
4043   /* If we need the remainder or if OP1 is constant, we need to
4044      put OP0 in a register in case it has any queued subexpressions.  */
4045   if (rem_flag || op1_is_constant)
4046     op0 = force_reg (compute_mode, op0);
4047
4048   last = get_last_insn ();
4049
4050   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4051   if (unsignedp)
4052     {
4053       if (code == FLOOR_DIV_EXPR)
4054         code = TRUNC_DIV_EXPR;
4055       if (code == FLOOR_MOD_EXPR)
4056         code = TRUNC_MOD_EXPR;
4057       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4058         code = TRUNC_DIV_EXPR;
4059     }
4060
4061   if (op1 != const0_rtx)
4062     switch (code)
4063       {
4064       case TRUNC_MOD_EXPR:
4065       case TRUNC_DIV_EXPR:
4066         if (op1_is_constant)
4067           {
4068             if (unsignedp)
4069               {
4070                 unsigned HOST_WIDE_INT mh, ml;
4071                 int pre_shift, post_shift;
4072                 int dummy;
4073                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4074                                             & GET_MODE_MASK (compute_mode));
4075
4076                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4077                   {
4078                     pre_shift = floor_log2 (d);
4079                     if (rem_flag)
4080                       {
4081                         unsigned HOST_WIDE_INT mask
4082                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4083                         remainder
4084                           = expand_binop (compute_mode, and_optab, op0,
4085                                           gen_int_mode (mask, compute_mode),
4086                                           remainder, 1,
4087                                           OPTAB_LIB_WIDEN);
4088                         if (remainder)
4089                           return gen_lowpart (mode, remainder);
4090                       }
4091                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4092                                              pre_shift, tquotient, 1);
4093                   }
4094                 else if (size <= HOST_BITS_PER_WIDE_INT)
4095                   {
4096                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4097                       {
4098                         /* Most significant bit of divisor is set; emit an scc
4099                            insn.  */
4100                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4101                                                           compute_mode, 1, 1);
4102                       }
4103                     else
4104                       {
4105                         /* Find a suitable multiplier and right shift count
4106                            instead of multiplying with D.  */
4107
4108                         mh = choose_multiplier (d, size, size,
4109                                                 &ml, &post_shift, &dummy);
4110
4111                         /* If the suggested multiplier is more than SIZE bits,
4112                            we can do better for even divisors, using an
4113                            initial right shift.  */
4114                         if (mh != 0 && (d & 1) == 0)
4115                           {
4116                             pre_shift = floor_log2 (d & -d);
4117                             mh = choose_multiplier (d >> pre_shift, size,
4118                                                     size - pre_shift,
4119                                                     &ml, &post_shift, &dummy);
4120                             gcc_assert (!mh);
4121                           }
4122                         else
4123                           pre_shift = 0;
4124
4125                         if (mh != 0)
4126                           {
4127                             rtx t1, t2, t3, t4;
4128
4129                             if (post_shift - 1 >= BITS_PER_WORD)
4130                               goto fail1;
4131
4132                             extra_cost
4133                               = (shift_cost (speed, compute_mode, post_shift - 1)
4134                                  + shift_cost (speed, compute_mode, 1)
4135                                  + 2 * add_cost (speed, compute_mode));
4136                             t1 = expmed_mult_highpart
4137                               (compute_mode, op0,
4138                                gen_int_mode (ml, compute_mode),
4139                                NULL_RTX, 1, max_cost - extra_cost);
4140                             if (t1 == 0)
4141                               goto fail1;
4142                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4143                                                                op0, t1),
4144                                                 NULL_RTX);
4145                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4146                                                t2, 1, NULL_RTX, 1);
4147                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4148                                                               t1, t3),
4149                                                 NULL_RTX);
4150                             quotient = expand_shift
4151                               (RSHIFT_EXPR, compute_mode, t4,
4152                                post_shift - 1, tquotient, 1);
4153                           }
4154                         else
4155                           {
4156                             rtx t1, t2;
4157
4158                             if (pre_shift >= BITS_PER_WORD
4159                                 || post_shift >= BITS_PER_WORD)
4160                               goto fail1;
4161
4162                             t1 = expand_shift
4163                               (RSHIFT_EXPR, compute_mode, op0,
4164                                pre_shift, NULL_RTX, 1);
4165                             extra_cost
4166                               = (shift_cost (speed, compute_mode, pre_shift)
4167                                  + shift_cost (speed, compute_mode, post_shift));
4168                             t2 = expmed_mult_highpart
4169                               (compute_mode, t1,
4170                                gen_int_mode (ml, compute_mode),
4171                                NULL_RTX, 1, max_cost - extra_cost);
4172                             if (t2 == 0)
4173                               goto fail1;
4174                             quotient = expand_shift
4175                               (RSHIFT_EXPR, compute_mode, t2,
4176                                post_shift, tquotient, 1);
4177                           }
4178                       }
4179                   }
4180                 else            /* Too wide mode to use tricky code */
4181                   break;
4182
4183                 insn = get_last_insn ();
4184                 if (insn != last)
4185                   set_dst_reg_note (insn, REG_EQUAL,
4186                                     gen_rtx_UDIV (compute_mode, op0, op1),
4187                                     quotient);
4188               }
4189             else                /* TRUNC_DIV, signed */
4190               {
4191                 unsigned HOST_WIDE_INT ml;
4192                 int lgup, post_shift;
4193                 rtx mlr;
4194                 HOST_WIDE_INT d = INTVAL (op1);
4195                 unsigned HOST_WIDE_INT abs_d;
4196
4197                 /* Since d might be INT_MIN, we have to cast to
4198                    unsigned HOST_WIDE_INT before negating to avoid
4199                    undefined signed overflow.  */
4200                 abs_d = (d >= 0
4201                          ? (unsigned HOST_WIDE_INT) d
4202                          : - (unsigned HOST_WIDE_INT) d);
4203
4204                 /* n rem d = n rem -d */
4205                 if (rem_flag && d < 0)
4206                   {
4207                     d = abs_d;
4208                     op1 = gen_int_mode (abs_d, compute_mode);
4209                   }
4210
4211                 if (d == 1)
4212                   quotient = op0;
4213                 else if (d == -1)
4214                   quotient = expand_unop (compute_mode, neg_optab, op0,
4215                                           tquotient, 0);
4216                 else if (HOST_BITS_PER_WIDE_INT >= size
4217                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4218                   {
4219                     /* This case is not handled correctly below.  */
4220                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4221                                                 compute_mode, 1, 1);
4222                     if (quotient == 0)
4223                       goto fail1;
4224                   }
4225                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4226                          && (rem_flag
4227                              ? smod_pow2_cheap (speed, compute_mode)
4228                              : sdiv_pow2_cheap (speed, compute_mode))
4229                          /* We assume that cheap metric is true if the
4230                             optab has an expander for this mode.  */
4231                          && ((optab_handler ((rem_flag ? smod_optab
4232                                               : sdiv_optab),
4233                                              compute_mode)
4234                               != CODE_FOR_nothing)
4235                              || (optab_handler (sdivmod_optab,
4236                                                 compute_mode)
4237                                  != CODE_FOR_nothing)))
4238                   ;
4239                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4240                   {
4241                     if (rem_flag)
4242                       {
4243                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4244                         if (remainder)
4245                           return gen_lowpart (mode, remainder);
4246                       }
4247
4248                     if (sdiv_pow2_cheap (speed, compute_mode)
4249                         && ((optab_handler (sdiv_optab, compute_mode)
4250                              != CODE_FOR_nothing)
4251                             || (optab_handler (sdivmod_optab, compute_mode)
4252                                 != CODE_FOR_nothing)))
4253                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4254                                                 compute_mode, op0,
4255                                                 gen_int_mode (abs_d,
4256                                                               compute_mode),
4257                                                 NULL_RTX, 0);
4258                     else
4259                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4260
4261                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4262                        negate the quotient.  */
4263                     if (d < 0)
4264                       {
4265                         insn = get_last_insn ();
4266                         if (insn != last
4267                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4268                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4269                           set_dst_reg_note (insn, REG_EQUAL,
4270                                             gen_rtx_DIV (compute_mode, op0,
4271                                                          gen_int_mode
4272                                                            (abs_d,
4273                                                             compute_mode)),
4274                                             quotient);
4275
4276                         quotient = expand_unop (compute_mode, neg_optab,
4277                                                 quotient, quotient, 0);
4278                       }
4279                   }
4280                 else if (size <= HOST_BITS_PER_WIDE_INT)
4281                   {
4282                     choose_multiplier (abs_d, size, size - 1,
4283                                        &ml, &post_shift, &lgup);
4284                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4285                       {
4286                         rtx t1, t2, t3;
4287
4288                         if (post_shift >= BITS_PER_WORD
4289                             || size - 1 >= BITS_PER_WORD)
4290                           goto fail1;
4291
4292                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4293                                       + shift_cost (speed, compute_mode, size - 1)
4294                                       + add_cost (speed, compute_mode));
4295                         t1 = expmed_mult_highpart
4296                           (compute_mode, op0, gen_int_mode (ml, compute_mode),
4297                            NULL_RTX, 0, max_cost - extra_cost);
4298                         if (t1 == 0)
4299                           goto fail1;
4300                         t2 = expand_shift
4301                           (RSHIFT_EXPR, compute_mode, t1,
4302                            post_shift, NULL_RTX, 0);
4303                         t3 = expand_shift
4304                           (RSHIFT_EXPR, compute_mode, op0,
4305                            size - 1, NULL_RTX, 0);
4306                         if (d < 0)
4307                           quotient
4308                             = force_operand (gen_rtx_MINUS (compute_mode,
4309                                                             t3, t2),
4310                                              tquotient);
4311                         else
4312                           quotient
4313                             = force_operand (gen_rtx_MINUS (compute_mode,
4314                                                             t2, t3),
4315                                              tquotient);
4316                       }
4317                     else
4318                       {
4319                         rtx t1, t2, t3, t4;
4320
4321                         if (post_shift >= BITS_PER_WORD
4322                             || size - 1 >= BITS_PER_WORD)
4323                           goto fail1;
4324
4325                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4326                         mlr = gen_int_mode (ml, compute_mode);
4327                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4328                                       + shift_cost (speed, compute_mode, size - 1)
4329                                       + 2 * add_cost (speed, compute_mode));
4330                         t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4331                                                    NULL_RTX, 0,
4332                                                    max_cost - extra_cost);
4333                         if (t1 == 0)
4334                           goto fail1;
4335                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4336                                                           t1, op0),
4337                                             NULL_RTX);
4338                         t3 = expand_shift
4339                           (RSHIFT_EXPR, compute_mode, t2,
4340                            post_shift, NULL_RTX, 0);
4341                         t4 = expand_shift
4342                           (RSHIFT_EXPR, compute_mode, op0,
4343                            size - 1, NULL_RTX, 0);
4344                         if (d < 0)
4345                           quotient
4346                             = force_operand (gen_rtx_MINUS (compute_mode,
4347                                                             t4, t3),
4348                                              tquotient);
4349                         else
4350                           quotient
4351                             = force_operand (gen_rtx_MINUS (compute_mode,
4352                                                             t3, t4),
4353                                              tquotient);
4354                       }
4355                   }
4356                 else            /* Too wide mode to use tricky code */
4357                   break;
4358
4359                 insn = get_last_insn ();
4360                 if (insn != last)
4361                   set_dst_reg_note (insn, REG_EQUAL,
4362                                     gen_rtx_DIV (compute_mode, op0, op1),
4363                                     quotient);
4364               }
4365             break;
4366           }
4367       fail1:
4368         delete_insns_since (last);
4369         break;
4370
4371       case FLOOR_DIV_EXPR:
4372       case FLOOR_MOD_EXPR:
4373       /* We will come here only for signed operations.  */
4374         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4375           {
4376             unsigned HOST_WIDE_INT mh, ml;
4377             int pre_shift, lgup, post_shift;
4378             HOST_WIDE_INT d = INTVAL (op1);
4379
4380             if (d > 0)
4381               {
4382                 /* We could just as easily deal with negative constants here,
4383                    but it does not seem worth the trouble for GCC 2.6.  */
4384                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4385                   {
4386                     pre_shift = floor_log2 (d);
4387                     if (rem_flag)
4388                       {
4389                         unsigned HOST_WIDE_INT mask
4390                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4391                         remainder = expand_binop
4392                           (compute_mode, and_optab, op0,
4393                            gen_int_mode (mask, compute_mode),
4394                            remainder, 0, OPTAB_LIB_WIDEN);
4395                         if (remainder)
4396                           return gen_lowpart (mode, remainder);
4397                       }
4398                     quotient = expand_shift
4399                       (RSHIFT_EXPR, compute_mode, op0,
4400                        pre_shift, tquotient, 0);
4401                   }
4402                 else
4403                   {
4404                     rtx t1, t2, t3, t4;
4405
4406                     mh = choose_multiplier (d, size, size - 1,
4407                                             &ml, &post_shift, &lgup);
4408                     gcc_assert (!mh);
4409
4410                     if (post_shift < BITS_PER_WORD
4411                         && size - 1 < BITS_PER_WORD)
4412                       {
4413                         t1 = expand_shift
4414                           (RSHIFT_EXPR, compute_mode, op0,
4415                            size - 1, NULL_RTX, 0);
4416                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4417                                            NULL_RTX, 0, OPTAB_WIDEN);
4418                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4419                                       + shift_cost (speed, compute_mode, size - 1)
4420                                       + 2 * add_cost (speed, compute_mode));
4421                         t3 = expmed_mult_highpart
4422                           (compute_mode, t2, gen_int_mode (ml, compute_mode),
4423                            NULL_RTX, 1, max_cost - extra_cost);
4424                         if (t3 != 0)
4425                           {
4426                             t4 = expand_shift
4427                               (RSHIFT_EXPR, compute_mode, t3,
4428                                post_shift, NULL_RTX, 1);
4429                             quotient = expand_binop (compute_mode, xor_optab,
4430                                                      t4, t1, tquotient, 0,
4431                                                      OPTAB_WIDEN);
4432                           }
4433                       }
4434                   }
4435               }
4436             else
4437               {
4438                 rtx nsign, t1, t2, t3, t4;
4439                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4440                                                   op0, constm1_rtx), NULL_RTX);
4441                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4442                                    0, OPTAB_WIDEN);
4443                 nsign = expand_shift
4444                   (RSHIFT_EXPR, compute_mode, t2,
4445                    size - 1, NULL_RTX, 0);
4446                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4447                                     NULL_RTX);
4448                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4449                                     NULL_RTX, 0);
4450                 if (t4)
4451                   {
4452                     rtx t5;
4453                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4454                                       NULL_RTX, 0);
4455                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4456                                                             t4, t5),
4457                                               tquotient);
4458                   }
4459               }
4460           }
4461
4462         if (quotient != 0)
4463           break;
4464         delete_insns_since (last);
4465
4466         /* Try using an instruction that produces both the quotient and
4467            remainder, using truncation.  We can easily compensate the quotient
4468            or remainder to get floor rounding, once we have the remainder.
4469            Notice that we compute also the final remainder value here,
4470            and return the result right away.  */
4471         if (target == 0 || GET_MODE (target) != compute_mode)
4472           target = gen_reg_rtx (compute_mode);
4473
4474         if (rem_flag)
4475           {
4476             remainder
4477               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4478             quotient = gen_reg_rtx (compute_mode);
4479           }
4480         else
4481           {
4482             quotient
4483               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4484             remainder = gen_reg_rtx (compute_mode);
4485           }
4486
4487         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4488                                  quotient, remainder, 0))
4489           {
4490             /* This could be computed with a branch-less sequence.
4491                Save that for later.  */
4492             rtx tem;
4493             rtx_code_label *label = gen_label_rtx ();
4494             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4495             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4496                                 NULL_RTX, 0, OPTAB_WIDEN);
4497             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4498             expand_dec (quotient, const1_rtx);
4499             expand_inc (remainder, op1);
4500             emit_label (label);
4501             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4502           }
4503
4504         /* No luck with division elimination or divmod.  Have to do it
4505            by conditionally adjusting op0 *and* the result.  */
4506         {
4507           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4508           rtx adjusted_op0;
4509           rtx tem;
4510
4511           quotient = gen_reg_rtx (compute_mode);
4512           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4513           label1 = gen_label_rtx ();
4514           label2 = gen_label_rtx ();
4515           label3 = gen_label_rtx ();
4516           label4 = gen_label_rtx ();
4517           label5 = gen_label_rtx ();
4518           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4519           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4520           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4521                               quotient, 0, OPTAB_LIB_WIDEN);
4522           if (tem != quotient)
4523             emit_move_insn (quotient, tem);
4524           emit_jump_insn (targetm.gen_jump (label5));
4525           emit_barrier ();
4526           emit_label (label1);
4527           expand_inc (adjusted_op0, const1_rtx);
4528           emit_jump_insn (targetm.gen_jump (label4));
4529           emit_barrier ();
4530           emit_label (label2);
4531           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4532           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4533                               quotient, 0, OPTAB_LIB_WIDEN);
4534           if (tem != quotient)
4535             emit_move_insn (quotient, tem);
4536           emit_jump_insn (targetm.gen_jump (label5));
4537           emit_barrier ();
4538           emit_label (label3);
4539           expand_dec (adjusted_op0, const1_rtx);
4540           emit_label (label4);
4541           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4542                               quotient, 0, OPTAB_LIB_WIDEN);
4543           if (tem != quotient)
4544             emit_move_insn (quotient, tem);
4545           expand_dec (quotient, const1_rtx);
4546           emit_label (label5);
4547         }
4548         break;
4549
4550       case CEIL_DIV_EXPR:
4551       case CEIL_MOD_EXPR:
4552         if (unsignedp)
4553           {
4554             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4555               {
4556                 rtx t1, t2, t3;
4557                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4558                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4559                                    floor_log2 (d), tquotient, 1);
4560                 t2 = expand_binop (compute_mode, and_optab, op0,
4561                                    gen_int_mode (d - 1, compute_mode),
4562                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4563                 t3 = gen_reg_rtx (compute_mode);
4564                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4565                                       compute_mode, 1, 1);
4566                 if (t3 == 0)
4567                   {
4568                     rtx_code_label *lab;
4569                     lab = gen_label_rtx ();
4570                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4571                     expand_inc (t1, const1_rtx);
4572                     emit_label (lab);
4573                     quotient = t1;
4574                   }
4575                 else
4576                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4577                                                           t1, t3),
4578                                             tquotient);
4579                 break;
4580               }
4581
4582             /* Try using an instruction that produces both the quotient and
4583                remainder, using truncation.  We can easily compensate the
4584                quotient or remainder to get ceiling rounding, once we have the
4585                remainder.  Notice that we compute also the final remainder
4586                value here, and return the result right away.  */
4587             if (target == 0 || GET_MODE (target) != compute_mode)
4588               target = gen_reg_rtx (compute_mode);
4589
4590             if (rem_flag)
4591               {
4592                 remainder = (REG_P (target)
4593                              ? target : gen_reg_rtx (compute_mode));
4594                 quotient = gen_reg_rtx (compute_mode);
4595               }
4596             else
4597               {
4598                 quotient = (REG_P (target)
4599                             ? target : gen_reg_rtx (compute_mode));
4600                 remainder = gen_reg_rtx (compute_mode);
4601               }
4602
4603             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4604                                      remainder, 1))
4605               {
4606                 /* This could be computed with a branch-less sequence.
4607                    Save that for later.  */
4608                 rtx_code_label *label = gen_label_rtx ();
4609                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4610                                  compute_mode, label);
4611                 expand_inc (quotient, const1_rtx);
4612                 expand_dec (remainder, op1);
4613                 emit_label (label);
4614                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4615               }
4616
4617             /* No luck with division elimination or divmod.  Have to do it
4618                by conditionally adjusting op0 *and* the result.  */
4619             {
4620               rtx_code_label *label1, *label2;
4621               rtx adjusted_op0, tem;
4622
4623               quotient = gen_reg_rtx (compute_mode);
4624               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4625               label1 = gen_label_rtx ();
4626               label2 = gen_label_rtx ();
4627               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4628                                compute_mode, label1);
4629               emit_move_insn  (quotient, const0_rtx);
4630               emit_jump_insn (targetm.gen_jump (label2));
4631               emit_barrier ();
4632               emit_label (label1);
4633               expand_dec (adjusted_op0, const1_rtx);
4634               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4635                                   quotient, 1, OPTAB_LIB_WIDEN);
4636               if (tem != quotient)
4637                 emit_move_insn (quotient, tem);
4638               expand_inc (quotient, const1_rtx);
4639               emit_label (label2);
4640             }
4641           }
4642         else /* signed */
4643           {
4644             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4645                 && INTVAL (op1) >= 0)
4646               {
4647                 /* This is extremely similar to the code for the unsigned case
4648                    above.  For 2.7 we should merge these variants, but for
4649                    2.6.1 I don't want to touch the code for unsigned since that
4650                    get used in C.  The signed case will only be used by other
4651                    languages (Ada).  */
4652
4653                 rtx t1, t2, t3;
4654                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4655                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4656                                    floor_log2 (d), tquotient, 0);
4657                 t2 = expand_binop (compute_mode, and_optab, op0,
4658                                    gen_int_mode (d - 1, compute_mode),
4659                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4660                 t3 = gen_reg_rtx (compute_mode);
4661                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4662                                       compute_mode, 1, 1);
4663                 if (t3 == 0)
4664                   {
4665                     rtx_code_label *lab;
4666                     lab = gen_label_rtx ();
4667                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4668                     expand_inc (t1, const1_rtx);
4669                     emit_label (lab);
4670                     quotient = t1;
4671                   }
4672                 else
4673                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4674                                                           t1, t3),
4675                                             tquotient);
4676                 break;
4677               }
4678
4679             /* Try using an instruction that produces both the quotient and
4680                remainder, using truncation.  We can easily compensate the
4681                quotient or remainder to get ceiling rounding, once we have the
4682                remainder.  Notice that we compute also the final remainder
4683                value here, and return the result right away.  */
4684             if (target == 0 || GET_MODE (target) != compute_mode)
4685               target = gen_reg_rtx (compute_mode);
4686             if (rem_flag)
4687               {
4688                 remainder= (REG_P (target)
4689                             ? target : gen_reg_rtx (compute_mode));
4690                 quotient = gen_reg_rtx (compute_mode);
4691               }
4692             else
4693               {
4694                 quotient = (REG_P (target)
4695                             ? target : gen_reg_rtx (compute_mode));
4696                 remainder = gen_reg_rtx (compute_mode);
4697               }
4698
4699             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4700                                      remainder, 0))
4701               {
4702                 /* This could be computed with a branch-less sequence.
4703                    Save that for later.  */
4704                 rtx tem;
4705                 rtx_code_label *label = gen_label_rtx ();
4706                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4707                                  compute_mode, label);
4708                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4709                                     NULL_RTX, 0, OPTAB_WIDEN);
4710                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4711                 expand_inc (quotient, const1_rtx);
4712                 expand_dec (remainder, op1);
4713                 emit_label (label);
4714                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4715               }
4716
4717             /* No luck with division elimination or divmod.  Have to do it
4718                by conditionally adjusting op0 *and* the result.  */
4719             {
4720               rtx_code_label *label1, *label2, *label3, *label4, *label5;
4721               rtx adjusted_op0;
4722               rtx tem;
4723
4724               quotient = gen_reg_rtx (compute_mode);
4725               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4726               label1 = gen_label_rtx ();
4727               label2 = gen_label_rtx ();
4728               label3 = gen_label_rtx ();
4729               label4 = gen_label_rtx ();
4730               label5 = gen_label_rtx ();
4731               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4732               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4733                                compute_mode, label1);
4734               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4735                                   quotient, 0, OPTAB_LIB_WIDEN);
4736               if (tem != quotient)
4737                 emit_move_insn (quotient, tem);
4738               emit_jump_insn (targetm.gen_jump (label5));
4739               emit_barrier ();
4740               emit_label (label1);
4741               expand_dec (adjusted_op0, const1_rtx);
4742               emit_jump_insn (targetm.gen_jump (label4));
4743               emit_barrier ();
4744               emit_label (label2);
4745               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4746                                compute_mode, label3);
4747               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4748                                   quotient, 0, OPTAB_LIB_WIDEN);
4749               if (tem != quotient)
4750                 emit_move_insn (quotient, tem);
4751               emit_jump_insn (targetm.gen_jump (label5));
4752               emit_barrier ();
4753               emit_label (label3);
4754               expand_inc (adjusted_op0, const1_rtx);
4755               emit_label (label4);
4756               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4757                                   quotient, 0, OPTAB_LIB_WIDEN);
4758               if (tem != quotient)
4759                 emit_move_insn (quotient, tem);
4760               expand_inc (quotient, const1_rtx);
4761               emit_label (label5);
4762             }
4763           }
4764         break;
4765
4766       case EXACT_DIV_EXPR:
4767         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4768           {
4769             HOST_WIDE_INT d = INTVAL (op1);
4770             unsigned HOST_WIDE_INT ml;
4771             int pre_shift;
4772             rtx t1;
4773
4774             pre_shift = floor_log2 (d & -d);
4775             ml = invert_mod2n (d >> pre_shift, size);
4776             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4777                                pre_shift, NULL_RTX, unsignedp);
4778             quotient = expand_mult (compute_mode, t1,
4779                                     gen_int_mode (ml, compute_mode),
4780                                     NULL_RTX, 1);
4781
4782             insn = get_last_insn ();
4783             set_dst_reg_note (insn, REG_EQUAL,
4784                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4785                                               compute_mode, op0, op1),
4786                               quotient);
4787           }
4788         break;
4789
4790       case ROUND_DIV_EXPR:
4791       case ROUND_MOD_EXPR:
4792         if (unsignedp)
4793           {
4794             rtx tem;
4795             rtx_code_label *label;
4796             label = gen_label_rtx ();
4797             quotient = gen_reg_rtx (compute_mode);
4798             remainder = gen_reg_rtx (compute_mode);
4799             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4800               {
4801                 rtx tem;
4802                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4803                                          quotient, 1, OPTAB_LIB_WIDEN);
4804                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4805                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4806                                           remainder, 1, OPTAB_LIB_WIDEN);
4807               }
4808             tem = plus_constant (compute_mode, op1, -1);
4809             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4810             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4811             expand_inc (quotient, const1_rtx);
4812             expand_dec (remainder, op1);
4813             emit_label (label);
4814           }
4815         else
4816           {
4817             rtx abs_rem, abs_op1, tem, mask;
4818             rtx_code_label *label;
4819             label = gen_label_rtx ();
4820             quotient = gen_reg_rtx (compute_mode);
4821             remainder = gen_reg_rtx (compute_mode);
4822             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4823               {
4824                 rtx tem;
4825                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4826                                          quotient, 0, OPTAB_LIB_WIDEN);
4827                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4828                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4829                                           remainder, 0, OPTAB_LIB_WIDEN);
4830               }
4831             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4832             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4833             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4834                                 1, NULL_RTX, 1);
4835             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4836             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4837                                 NULL_RTX, 0, OPTAB_WIDEN);
4838             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4839                                  size - 1, NULL_RTX, 0);
4840             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4841                                 NULL_RTX, 0, OPTAB_WIDEN);
4842             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4843                                 NULL_RTX, 0, OPTAB_WIDEN);
4844             expand_inc (quotient, tem);
4845             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4846                                 NULL_RTX, 0, OPTAB_WIDEN);
4847             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4848                                 NULL_RTX, 0, OPTAB_WIDEN);
4849             expand_dec (remainder, tem);
4850             emit_label (label);
4851           }
4852         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4853
4854       default:
4855         gcc_unreachable ();
4856       }
4857
4858   if (quotient == 0)
4859     {
4860       if (target && GET_MODE (target) != compute_mode)
4861         target = 0;
4862
4863       if (rem_flag)
4864         {
4865           /* Try to produce the remainder without producing the quotient.
4866              If we seem to have a divmod pattern that does not require widening,
4867              don't try widening here.  We should really have a WIDEN argument
4868              to expand_twoval_binop, since what we'd really like to do here is
4869              1) try a mod insn in compute_mode
4870              2) try a divmod insn in compute_mode
4871              3) try a div insn in compute_mode and multiply-subtract to get
4872                 remainder
4873              4) try the same things with widening allowed.  */
4874           remainder
4875             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4876                                  op0, op1, target,
4877                                  unsignedp,
4878                                  ((optab_handler (optab2, compute_mode)
4879                                    != CODE_FOR_nothing)
4880                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4881           if (remainder == 0)
4882             {
4883               /* No luck there.  Can we do remainder and divide at once
4884                  without a library call?  */
4885               remainder = gen_reg_rtx (compute_mode);
4886               if (! expand_twoval_binop ((unsignedp
4887                                           ? udivmod_optab
4888                                           : sdivmod_optab),
4889                                          op0, op1,
4890                                          NULL_RTX, remainder, unsignedp))
4891                 remainder = 0;
4892             }
4893
4894           if (remainder)
4895             return gen_lowpart (mode, remainder);
4896         }
4897
4898       /* Produce the quotient.  Try a quotient insn, but not a library call.
4899          If we have a divmod in this mode, use it in preference to widening
4900          the div (for this test we assume it will not fail). Note that optab2
4901          is set to the one of the two optabs that the call below will use.  */
4902       quotient
4903         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4904                              op0, op1, rem_flag ? NULL_RTX : target,
4905                              unsignedp,
4906                              ((optab_handler (optab2, compute_mode)
4907                                != CODE_FOR_nothing)
4908                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4909
4910       if (quotient == 0)
4911         {
4912           /* No luck there.  Try a quotient-and-remainder insn,
4913              keeping the quotient alone.  */
4914           quotient = gen_reg_rtx (compute_mode);
4915           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4916                                      op0, op1,
4917                                      quotient, NULL_RTX, unsignedp))
4918             {
4919               quotient = 0;
4920               if (! rem_flag)
4921                 /* Still no luck.  If we are not computing the remainder,
4922                    use a library call for the quotient.  */
4923                 quotient = sign_expand_binop (compute_mode,
4924                                               udiv_optab, sdiv_optab,
4925                                               op0, op1, target,
4926                                               unsignedp, OPTAB_LIB_WIDEN);
4927             }
4928         }
4929     }
4930
4931   if (rem_flag)
4932     {
4933       if (target && GET_MODE (target) != compute_mode)
4934         target = 0;
4935
4936       if (quotient == 0)
4937         {
4938           /* No divide instruction either.  Use library for remainder.  */
4939           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4940                                          op0, op1, target,
4941                                          unsignedp, OPTAB_LIB_WIDEN);
4942           /* No remainder function.  Try a quotient-and-remainder
4943              function, keeping the remainder.  */
4944           if (!remainder)
4945             {
4946               remainder = gen_reg_rtx (compute_mode);
4947               if (!expand_twoval_binop_libfunc
4948                   (unsignedp ? udivmod_optab : sdivmod_optab,
4949                    op0, op1,
4950                    NULL_RTX, remainder,
4951                    unsignedp ? UMOD : MOD))
4952                 remainder = NULL_RTX;
4953             }
4954         }
4955       else
4956         {
4957           /* We divided.  Now finish doing X - Y * (X / Y).  */
4958           remainder = expand_mult (compute_mode, quotient, op1,
4959                                    NULL_RTX, unsignedp);
4960           remainder = expand_binop (compute_mode, sub_optab, op0,
4961                                     remainder, target, unsignedp,
4962                                     OPTAB_LIB_WIDEN);
4963         }
4964     }
4965
4966   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4967 }
4968 \f
4969 /* Return a tree node with data type TYPE, describing the value of X.
4970    Usually this is an VAR_DECL, if there is no obvious better choice.
4971    X may be an expression, however we only support those expressions
4972    generated by loop.c.  */
4973
4974 tree
4975 make_tree (tree type, rtx x)
4976 {
4977   tree t;
4978
4979   switch (GET_CODE (x))
4980     {
4981     case CONST_INT:
4982     case CONST_WIDE_INT:
4983       t = wide_int_to_tree (type, std::make_pair (x, TYPE_MODE (type)));
4984       return t;
4985
4986     case CONST_DOUBLE:
4987       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
4988       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
4989         t = wide_int_to_tree (type,
4990                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
4991                                                     HOST_BITS_PER_WIDE_INT * 2));
4992       else
4993         t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
4994
4995       return t;
4996
4997     case CONST_VECTOR:
4998       {
4999         int units = CONST_VECTOR_NUNITS (x);
5000         tree itype = TREE_TYPE (type);
5001         tree *elts;
5002         int i;
5003
5004         /* Build a tree with vector elements.  */
5005         elts = XALLOCAVEC (tree, units);
5006         for (i = units - 1; i >= 0; --i)
5007           {
5008             rtx elt = CONST_VECTOR_ELT (x, i);
5009             elts[i] = make_tree (itype, elt);
5010           }
5011
5012         return build_vector (type, elts);
5013       }
5014
5015     case PLUS:
5016       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5017                           make_tree (type, XEXP (x, 1)));
5018
5019     case MINUS:
5020       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5021                           make_tree (type, XEXP (x, 1)));
5022
5023     case NEG:
5024       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5025
5026     case MULT:
5027       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5028                           make_tree (type, XEXP (x, 1)));
5029
5030     case ASHIFT:
5031       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5032                           make_tree (type, XEXP (x, 1)));
5033
5034     case LSHIFTRT:
5035       t = unsigned_type_for (type);
5036       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5037                                          make_tree (t, XEXP (x, 0)),
5038                                          make_tree (type, XEXP (x, 1))));
5039
5040     case ASHIFTRT:
5041       t = signed_type_for (type);
5042       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5043                                          make_tree (t, XEXP (x, 0)),
5044                                          make_tree (type, XEXP (x, 1))));
5045
5046     case DIV:
5047       if (TREE_CODE (type) != REAL_TYPE)
5048         t = signed_type_for (type);
5049       else
5050         t = type;
5051
5052       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5053                                          make_tree (t, XEXP (x, 0)),
5054                                          make_tree (t, XEXP (x, 1))));
5055     case UDIV:
5056       t = unsigned_type_for (type);
5057       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5058                                          make_tree (t, XEXP (x, 0)),
5059                                          make_tree (t, XEXP (x, 1))));
5060
5061     case SIGN_EXTEND:
5062     case ZERO_EXTEND:
5063       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5064                                           GET_CODE (x) == ZERO_EXTEND);
5065       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5066
5067     case CONST:
5068       return make_tree (type, XEXP (x, 0));
5069
5070     case SYMBOL_REF:
5071       t = SYMBOL_REF_DECL (x);
5072       if (t)
5073         return fold_convert (type, build_fold_addr_expr (t));
5074       /* else fall through.  */
5075
5076     default:
5077       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5078
5079       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5080          address mode to pointer mode.  */
5081       if (POINTER_TYPE_P (type))
5082         x = convert_memory_address_addr_space
5083               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5084
5085       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5086          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5087       t->decl_with_rtl.rtl = x;
5088
5089       return t;
5090     }
5091 }
5092 \f
5093 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5094    and returning TARGET.
5095
5096    If TARGET is 0, a pseudo-register or constant is returned.  */
5097
5098 rtx
5099 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5100 {
5101   rtx tem = 0;
5102
5103   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5104     tem = simplify_binary_operation (AND, mode, op0, op1);
5105   if (tem == 0)
5106     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5107
5108   if (target == 0)
5109     target = tem;
5110   else if (tem != target)
5111     emit_move_insn (target, tem);
5112   return target;
5113 }
5114
5115 /* Helper function for emit_store_flag.  */
5116 rtx
5117 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5118              machine_mode mode, machine_mode compare_mode,
5119              int unsignedp, rtx x, rtx y, int normalizep,
5120              machine_mode target_mode)
5121 {
5122   struct expand_operand ops[4];
5123   rtx op0, comparison, subtarget;
5124   rtx_insn *last;
5125   machine_mode result_mode = targetm.cstore_mode (icode);
5126
5127   last = get_last_insn ();
5128   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5129   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5130   if (!x || !y)
5131     {
5132       delete_insns_since (last);
5133       return NULL_RTX;
5134     }
5135
5136   if (target_mode == VOIDmode)
5137     target_mode = result_mode;
5138   if (!target)
5139     target = gen_reg_rtx (target_mode);
5140
5141   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5142
5143   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5144   create_fixed_operand (&ops[1], comparison);
5145   create_fixed_operand (&ops[2], x);
5146   create_fixed_operand (&ops[3], y);
5147   if (!maybe_expand_insn (icode, 4, ops))
5148     {
5149       delete_insns_since (last);
5150       return NULL_RTX;
5151     }
5152   subtarget = ops[0].value;
5153
5154   /* If we are converting to a wider mode, first convert to
5155      TARGET_MODE, then normalize.  This produces better combining
5156      opportunities on machines that have a SIGN_EXTRACT when we are
5157      testing a single bit.  This mostly benefits the 68k.
5158
5159      If STORE_FLAG_VALUE does not have the sign bit set when
5160      interpreted in MODE, we can do this conversion as unsigned, which
5161      is usually more efficient.  */
5162   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5163     {
5164       convert_move (target, subtarget,
5165                     val_signbit_known_clear_p (result_mode,
5166                                                STORE_FLAG_VALUE));
5167       op0 = target;
5168       result_mode = target_mode;
5169     }
5170   else
5171     op0 = subtarget;
5172
5173   /* If we want to keep subexpressions around, don't reuse our last
5174      target.  */
5175   if (optimize)
5176     subtarget = 0;
5177
5178   /* Now normalize to the proper value in MODE.  Sometimes we don't
5179      have to do anything.  */
5180   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5181     ;
5182   /* STORE_FLAG_VALUE might be the most negative number, so write
5183      the comparison this way to avoid a compiler-time warning.  */
5184   else if (- normalizep == STORE_FLAG_VALUE)
5185     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5186
5187   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5188      it hard to use a value of just the sign bit due to ANSI integer
5189      constant typing rules.  */
5190   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5191     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5192                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5193                         normalizep == 1);
5194   else
5195     {
5196       gcc_assert (STORE_FLAG_VALUE & 1);
5197
5198       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5199       if (normalizep == -1)
5200         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5201     }
5202
5203   /* If we were converting to a smaller mode, do the conversion now.  */
5204   if (target_mode != result_mode)
5205     {
5206       convert_move (target, op0, 0);
5207       return target;
5208     }
5209   else
5210     return op0;
5211 }
5212
5213
5214 /* A subroutine of emit_store_flag only including "tricks" that do not
5215    need a recursive call.  These are kept separate to avoid infinite
5216    loops.  */
5217
5218 static rtx
5219 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5220                    machine_mode mode, int unsignedp, int normalizep,
5221                    machine_mode target_mode)
5222 {
5223   rtx subtarget;
5224   enum insn_code icode;
5225   machine_mode compare_mode;
5226   enum mode_class mclass;
5227   enum rtx_code scode;
5228
5229   if (unsignedp)
5230     code = unsigned_condition (code);
5231   scode = swap_condition (code);
5232
5233   /* If one operand is constant, make it the second one.  Only do this
5234      if the other operand is not constant as well.  */
5235
5236   if (swap_commutative_operands_p (op0, op1))
5237     {
5238       std::swap (op0, op1);
5239       code = swap_condition (code);
5240     }
5241
5242   if (mode == VOIDmode)
5243     mode = GET_MODE (op0);
5244
5245   /* For some comparisons with 1 and -1, we can convert this to
5246      comparisons with zero.  This will often produce more opportunities for
5247      store-flag insns.  */
5248
5249   switch (code)
5250     {
5251     case LT:
5252       if (op1 == const1_rtx)
5253         op1 = const0_rtx, code = LE;
5254       break;
5255     case LE:
5256       if (op1 == constm1_rtx)
5257         op1 = const0_rtx, code = LT;
5258       break;
5259     case GE:
5260       if (op1 == const1_rtx)
5261         op1 = const0_rtx, code = GT;
5262       break;
5263     case GT:
5264       if (op1 == constm1_rtx)
5265         op1 = const0_rtx, code = GE;
5266       break;
5267     case GEU:
5268       if (op1 == const1_rtx)
5269         op1 = const0_rtx, code = NE;
5270       break;
5271     case LTU:
5272       if (op1 == const1_rtx)
5273         op1 = const0_rtx, code = EQ;
5274       break;
5275     default:
5276       break;
5277     }
5278
5279   /* If we are comparing a double-word integer with zero or -1, we can
5280      convert the comparison into one involving a single word.  */
5281   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5282       && GET_MODE_CLASS (mode) == MODE_INT
5283       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5284     {
5285       rtx tem;
5286       if ((code == EQ || code == NE)
5287           && (op1 == const0_rtx || op1 == constm1_rtx))
5288         {
5289           rtx op00, op01;
5290
5291           /* Do a logical OR or AND of the two words and compare the
5292              result.  */
5293           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5294           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5295           tem = expand_binop (word_mode,
5296                               op1 == const0_rtx ? ior_optab : and_optab,
5297                               op00, op01, NULL_RTX, unsignedp,
5298                               OPTAB_DIRECT);
5299
5300           if (tem != 0)
5301             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5302                                    unsignedp, normalizep);
5303         }
5304       else if ((code == LT || code == GE) && op1 == const0_rtx)
5305         {
5306           rtx op0h;
5307
5308           /* If testing the sign bit, can just test on high word.  */
5309           op0h = simplify_gen_subreg (word_mode, op0, mode,
5310                                       subreg_highpart_offset (word_mode,
5311                                                               mode));
5312           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5313                                  unsignedp, normalizep);
5314         }
5315       else
5316         tem = NULL_RTX;
5317
5318       if (tem)
5319         {
5320           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5321             return tem;
5322           if (!target)
5323             target = gen_reg_rtx (target_mode);
5324
5325           convert_move (target, tem,
5326                         !val_signbit_known_set_p (word_mode,
5327                                                   (normalizep ? normalizep
5328                                                    : STORE_FLAG_VALUE)));
5329           return target;
5330         }
5331     }
5332
5333   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5334      complement of A (for GE) and shifting the sign bit to the low bit.  */
5335   if (op1 == const0_rtx && (code == LT || code == GE)
5336       && GET_MODE_CLASS (mode) == MODE_INT
5337       && (normalizep || STORE_FLAG_VALUE == 1
5338           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5339     {
5340       subtarget = target;
5341
5342       if (!target)
5343         target_mode = mode;
5344
5345       /* If the result is to be wider than OP0, it is best to convert it
5346          first.  If it is to be narrower, it is *incorrect* to convert it
5347          first.  */
5348       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5349         {
5350           op0 = convert_modes (target_mode, mode, op0, 0);
5351           mode = target_mode;
5352         }
5353
5354       if (target_mode != mode)
5355         subtarget = 0;
5356
5357       if (code == GE)
5358         op0 = expand_unop (mode, one_cmpl_optab, op0,
5359                            ((STORE_FLAG_VALUE == 1 || normalizep)
5360                             ? 0 : subtarget), 0);
5361
5362       if (STORE_FLAG_VALUE == 1 || normalizep)
5363         /* If we are supposed to produce a 0/1 value, we want to do
5364            a logical shift from the sign bit to the low-order bit; for
5365            a -1/0 value, we do an arithmetic shift.  */
5366         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5367                             GET_MODE_BITSIZE (mode) - 1,
5368                             subtarget, normalizep != -1);
5369
5370       if (mode != target_mode)
5371         op0 = convert_modes (target_mode, mode, op0, 0);
5372
5373       return op0;
5374     }
5375
5376   mclass = GET_MODE_CLASS (mode);
5377   for (compare_mode = mode; compare_mode != VOIDmode;
5378        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5379     {
5380      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5381      icode = optab_handler (cstore_optab, optab_mode);
5382      if (icode != CODE_FOR_nothing)
5383         {
5384           do_pending_stack_adjust ();
5385           rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5386                                  unsignedp, op0, op1, normalizep, target_mode);
5387           if (tem)
5388             return tem;
5389
5390           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5391             {
5392               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5393                                  unsignedp, op1, op0, normalizep, target_mode);
5394               if (tem)
5395                 return tem;
5396             }
5397           break;
5398         }
5399     }
5400
5401   return 0;
5402 }
5403
5404 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5405    and storing in TARGET.  Normally return TARGET.
5406    Return 0 if that cannot be done.
5407
5408    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5409    it is VOIDmode, they cannot both be CONST_INT.
5410
5411    UNSIGNEDP is for the case where we have to widen the operands
5412    to perform the operation.  It says to use zero-extension.
5413
5414    NORMALIZEP is 1 if we should convert the result to be either zero
5415    or one.  Normalize is -1 if we should convert the result to be
5416    either zero or -1.  If NORMALIZEP is zero, the result will be left
5417    "raw" out of the scc insn.  */
5418
5419 rtx
5420 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5421                  machine_mode mode, int unsignedp, int normalizep)
5422 {
5423   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5424   enum rtx_code rcode;
5425   rtx subtarget;
5426   rtx tem, trueval;
5427   rtx_insn *last;
5428
5429   /* If we compare constants, we shouldn't use a store-flag operation,
5430      but a constant load.  We can get there via the vanilla route that
5431      usually generates a compare-branch sequence, but will in this case
5432      fold the comparison to a constant, and thus elide the branch.  */
5433   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5434     return NULL_RTX;
5435
5436   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5437                            target_mode);
5438   if (tem)
5439     return tem;
5440
5441   /* If we reached here, we can't do this with a scc insn, however there
5442      are some comparisons that can be done in other ways.  Don't do any
5443      of these cases if branches are very cheap.  */
5444   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5445     return 0;
5446
5447   /* See what we need to return.  We can only return a 1, -1, or the
5448      sign bit.  */
5449
5450   if (normalizep == 0)
5451     {
5452       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5453         normalizep = STORE_FLAG_VALUE;
5454
5455       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5456         ;
5457       else
5458         return 0;
5459     }
5460
5461   last = get_last_insn ();
5462
5463   /* If optimizing, use different pseudo registers for each insn, instead
5464      of reusing the same pseudo.  This leads to better CSE, but slows
5465      down the compiler, since there are more pseudos */
5466   subtarget = (!optimize
5467                && (target_mode == mode)) ? target : NULL_RTX;
5468   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5469
5470   /* For floating-point comparisons, try the reverse comparison or try
5471      changing the "orderedness" of the comparison.  */
5472   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5473     {
5474       enum rtx_code first_code;
5475       bool and_them;
5476
5477       rcode = reverse_condition_maybe_unordered (code);
5478       if (can_compare_p (rcode, mode, ccp_store_flag)
5479           && (code == ORDERED || code == UNORDERED
5480               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5481               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5482         {
5483           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5484                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5485
5486           /* For the reverse comparison, use either an addition or a XOR.  */
5487           if (want_add
5488               && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5489                            optimize_insn_for_speed_p ()) == 0)
5490             {
5491               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5492                                        STORE_FLAG_VALUE, target_mode);
5493               if (tem)
5494                 return expand_binop (target_mode, add_optab, tem,
5495                                      gen_int_mode (normalizep, target_mode),
5496                                      target, 0, OPTAB_WIDEN);
5497             }
5498           else if (!want_add
5499                    && rtx_cost (trueval, mode, XOR, 1,
5500                                 optimize_insn_for_speed_p ()) == 0)
5501             {
5502               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5503                                        normalizep, target_mode);
5504               if (tem)
5505                 return expand_binop (target_mode, xor_optab, tem, trueval,
5506                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5507             }
5508         }
5509
5510       delete_insns_since (last);
5511
5512       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5513       if (code == ORDERED || code == UNORDERED)
5514         return 0;
5515
5516       and_them = split_comparison (code, mode, &first_code, &code);
5517
5518       /* If there are no NaNs, the first comparison should always fall through.
5519          Effectively change the comparison to the other one.  */
5520       if (!HONOR_NANS (mode))
5521         {
5522           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5523           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5524                                     target_mode);
5525         }
5526
5527       if (!HAVE_conditional_move)
5528         return 0;
5529
5530       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5531          conditional move.  */
5532       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5533                                normalizep, target_mode);
5534       if (tem == 0)
5535         return 0;
5536
5537       if (and_them)
5538         tem = emit_conditional_move (target, code, op0, op1, mode,
5539                                      tem, const0_rtx, GET_MODE (tem), 0);
5540       else
5541         tem = emit_conditional_move (target, code, op0, op1, mode,
5542                                      trueval, tem, GET_MODE (tem), 0);
5543
5544       if (tem == 0)
5545         delete_insns_since (last);
5546       return tem;
5547     }
5548
5549   /* The remaining tricks only apply to integer comparisons.  */
5550
5551   if (GET_MODE_CLASS (mode) != MODE_INT)
5552     return 0;
5553
5554   /* If this is an equality comparison of integers, we can try to exclusive-or
5555      (or subtract) the two operands and use a recursive call to try the
5556      comparison with zero.  Don't do any of these cases if branches are
5557      very cheap.  */
5558
5559   if ((code == EQ || code == NE) && op1 != const0_rtx)
5560     {
5561       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5562                           OPTAB_WIDEN);
5563
5564       if (tem == 0)
5565         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5566                             OPTAB_WIDEN);
5567       if (tem != 0)
5568         tem = emit_store_flag (target, code, tem, const0_rtx,
5569                                mode, unsignedp, normalizep);
5570       if (tem != 0)
5571         return tem;
5572
5573       delete_insns_since (last);
5574     }
5575
5576   /* For integer comparisons, try the reverse comparison.  However, for
5577      small X and if we'd have anyway to extend, implementing "X != 0"
5578      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5579   rcode = reverse_condition (code);
5580   if (can_compare_p (rcode, mode, ccp_store_flag)
5581       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5582             && code == NE
5583             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5584             && op1 == const0_rtx))
5585     {
5586       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5587                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5588
5589       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5590       if (want_add
5591           && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5592                        optimize_insn_for_speed_p ()) == 0)
5593         {
5594           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5595                                    STORE_FLAG_VALUE, target_mode);
5596           if (tem != 0)
5597             tem = expand_binop (target_mode, add_optab, tem,
5598                                 gen_int_mode (normalizep, target_mode),
5599                                 target, 0, OPTAB_WIDEN);
5600         }
5601       else if (!want_add
5602                && rtx_cost (trueval, mode, XOR, 1,
5603                             optimize_insn_for_speed_p ()) == 0)
5604         {
5605           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5606                                    normalizep, target_mode);
5607           if (tem != 0)
5608             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5609                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5610         }
5611
5612       if (tem != 0)
5613         return tem;
5614       delete_insns_since (last);
5615     }
5616
5617   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5618      the constant zero.  Reject all other comparisons at this point.  Only
5619      do LE and GT if branches are expensive since they are expensive on
5620      2-operand machines.  */
5621
5622   if (op1 != const0_rtx
5623       || (code != EQ && code != NE
5624           && (BRANCH_COST (optimize_insn_for_speed_p (),
5625                            false) <= 1 || (code != LE && code != GT))))
5626     return 0;
5627
5628   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5629      do the necessary operation below.  */
5630
5631   tem = 0;
5632
5633   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5634      the sign bit set.  */
5635
5636   if (code == LE)
5637     {
5638       /* This is destructive, so SUBTARGET can't be OP0.  */
5639       if (rtx_equal_p (subtarget, op0))
5640         subtarget = 0;
5641
5642       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5643                           OPTAB_WIDEN);
5644       if (tem)
5645         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5646                             OPTAB_WIDEN);
5647     }
5648
5649   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5650      number of bits in the mode of OP0, minus one.  */
5651
5652   if (code == GT)
5653     {
5654       if (rtx_equal_p (subtarget, op0))
5655         subtarget = 0;
5656
5657       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5658                           GET_MODE_BITSIZE (mode) - 1,
5659                           subtarget, 0);
5660       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5661                           OPTAB_WIDEN);
5662     }
5663
5664   if (code == EQ || code == NE)
5665     {
5666       /* For EQ or NE, one way to do the comparison is to apply an operation
5667          that converts the operand into a positive number if it is nonzero
5668          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5669          for NE we negate.  This puts the result in the sign bit.  Then we
5670          normalize with a shift, if needed.
5671
5672          Two operations that can do the above actions are ABS and FFS, so try
5673          them.  If that doesn't work, and MODE is smaller than a full word,
5674          we can use zero-extension to the wider mode (an unsigned conversion)
5675          as the operation.  */
5676
5677       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5678          that is compensated by the subsequent overflow when subtracting
5679          one / negating.  */
5680
5681       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5682         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5683       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5684         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5685       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5686         {
5687           tem = convert_modes (word_mode, mode, op0, 1);
5688           mode = word_mode;
5689         }
5690
5691       if (tem != 0)
5692         {
5693           if (code == EQ)
5694             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5695                                 0, OPTAB_WIDEN);
5696           else
5697             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5698         }
5699
5700       /* If we couldn't do it that way, for NE we can "or" the two's complement
5701          of the value with itself.  For EQ, we take the one's complement of
5702          that "or", which is an extra insn, so we only handle EQ if branches
5703          are expensive.  */
5704
5705       if (tem == 0
5706           && (code == NE
5707               || BRANCH_COST (optimize_insn_for_speed_p (),
5708                               false) > 1))
5709         {
5710           if (rtx_equal_p (subtarget, op0))
5711             subtarget = 0;
5712
5713           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5714           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5715                               OPTAB_WIDEN);
5716
5717           if (tem && code == EQ)
5718             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5719         }
5720     }
5721
5722   if (tem && normalizep)
5723     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5724                         GET_MODE_BITSIZE (mode) - 1,
5725                         subtarget, normalizep == 1);
5726
5727   if (tem)
5728     {
5729       if (!target)
5730         ;
5731       else if (GET_MODE (tem) != target_mode)
5732         {
5733           convert_move (target, tem, 0);
5734           tem = target;
5735         }
5736       else if (!subtarget)
5737         {
5738           emit_move_insn (target, tem);
5739           tem = target;
5740         }
5741     }
5742   else
5743     delete_insns_since (last);
5744
5745   return tem;
5746 }
5747
5748 /* Like emit_store_flag, but always succeeds.  */
5749
5750 rtx
5751 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5752                        machine_mode mode, int unsignedp, int normalizep)
5753 {
5754   rtx tem;
5755   rtx_code_label *label;
5756   rtx trueval, falseval;
5757
5758   /* First see if emit_store_flag can do the job.  */
5759   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5760   if (tem != 0)
5761     return tem;
5762
5763   if (!target)
5764     target = gen_reg_rtx (word_mode);
5765
5766   /* If this failed, we have to do this with set/compare/jump/set code.
5767      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5768   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5769   if (code == NE
5770       && GET_MODE_CLASS (mode) == MODE_INT
5771       && REG_P (target)
5772       && op0 == target
5773       && op1 == const0_rtx)
5774     {
5775       label = gen_label_rtx ();
5776       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
5777                                NULL_RTX, NULL, label, -1);
5778       emit_move_insn (target, trueval);
5779       emit_label (label);
5780       return target;
5781     }
5782
5783   if (!REG_P (target)
5784       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5785     target = gen_reg_rtx (GET_MODE (target));
5786
5787   /* Jump in the right direction if the target cannot implement CODE
5788      but can jump on its reverse condition.  */
5789   falseval = const0_rtx;
5790   if (! can_compare_p (code, mode, ccp_jump)
5791       && (! FLOAT_MODE_P (mode)
5792           || code == ORDERED || code == UNORDERED
5793           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5794           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5795     {
5796       enum rtx_code rcode;
5797       if (FLOAT_MODE_P (mode))
5798         rcode = reverse_condition_maybe_unordered (code);
5799       else
5800         rcode = reverse_condition (code);
5801
5802       /* Canonicalize to UNORDERED for the libcall.  */
5803       if (can_compare_p (rcode, mode, ccp_jump)
5804           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5805         {
5806           falseval = trueval;
5807           trueval = const0_rtx;
5808           code = rcode;
5809         }
5810     }
5811
5812   emit_move_insn (target, trueval);
5813   label = gen_label_rtx ();
5814   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
5815                            label, -1);
5816
5817   emit_move_insn (target, falseval);
5818   emit_label (label);
5819
5820   return target;
5821 }
5822 \f
5823 /* Perform possibly multi-word comparison and conditional jump to LABEL
5824    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5825    now a thin wrapper around do_compare_rtx_and_jump.  */
5826
5827 static void
5828 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
5829                  rtx_code_label *label)
5830 {
5831   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5832   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
5833                            NULL, label, -1);
5834 }