gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2014 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "diagnostic-core.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "stor-layout.h"
  30 #include "tm_p.h"
  31 #include "flags.h"
  32 #include "insn-config.h"
  33 #include "expr.h"
  34 #include "optabs.h"
  35 #include "recog.h"
  36 #include "langhooks.h"
  37 #include "predict.h"
  38 #include "basic-block.h"
  39 #include "df.h"
  40 #include "target.h"
  41 #include "expmed.h"
  42
  43 struct target_expmed default_target_expmed;
  44 #if SWITCHABLE_TARGET
  45 struct target_expmed *this_target_expmed = &default_target_expmed;
  46 #endif
  47
  48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  49                                    unsigned HOST_WIDE_INT,
  50                                    unsigned HOST_WIDE_INT,
  51                                    unsigned HOST_WIDE_INT,
  52                                    rtx);
  53 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
  54                                      unsigned HOST_WIDE_INT,
  55                                      rtx);
  56 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  57                                    unsigned HOST_WIDE_INT,
  58                                    unsigned HOST_WIDE_INT,
  59                                    unsigned HOST_WIDE_INT,
  60                                    rtx);
  61 static rtx extract_fixed_bit_field (machine_mode, rtx,
  62                                     unsigned HOST_WIDE_INT,
  63                                     unsigned HOST_WIDE_INT, rtx, int);
  64 static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
  65                                       unsigned HOST_WIDE_INT,
  66                                       unsigned HOST_WIDE_INT, rtx, int);
  67 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  68 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  69                                     unsigned HOST_WIDE_INT, int);
  70 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  71 static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  72 static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  73
  74 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  75    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  76    The mask is truncated if necessary to the width of mode MODE.  The
  77    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
  78
  79 static inline rtx
  80 mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
  81 {
  82   return immed_wide_int_const
  83     (wi::shifted_mask (bitpos, bitsize, complement,
  84                        GET_MODE_PRECISION (mode)), mode);
  85 }
  86
  87 /* Test whether a value is zero of a power of two.  */
  88 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
  89   (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
  90
  91 struct init_expmed_rtl
  92 {
  93   rtx reg;
  94   rtx plus;
  95   rtx neg;
  96   rtx mult;
  97   rtx sdiv;
  98   rtx udiv;
  99   rtx sdiv_32;
 100   rtx smod_32;
 101   rtx wide_mult;
 102   rtx wide_lshr;
 103   rtx wide_trunc;
 104   rtx shift;
 105   rtx shift_mult;
 106   rtx shift_add;
 107   rtx shift_sub0;
 108   rtx shift_sub1;
 109   rtx zext;
 110   rtx trunc;
 111
 112   rtx pow2[MAX_BITS_PER_WORD];
 113   rtx cint[MAX_BITS_PER_WORD];
 114 };
 115
 116 static void
 117 init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
 118                       machine_mode from_mode, bool speed)
 119 {
 120   int to_size, from_size;
 121   rtx which;
 122
 123   to_size = GET_MODE_PRECISION (to_mode);
 124   from_size = GET_MODE_PRECISION (from_mode);
 125
 126   /* Most partial integers have a precision less than the "full"
 127      integer it requires for storage.  In case one doesn't, for
 128      comparison purposes here, reduce the bit size by one in that
 129      case.  */
 130   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 131       && exact_log2 (to_size) != -1)
 132     to_size --;
 133   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 134       && exact_log2 (from_size) != -1)
 135     from_size --;
 136
 137   /* Assume cost of zero-extend and sign-extend is the same.  */
 138   which = (to_size < from_size ? all->trunc : all->zext);
 139
 140   PUT_MODE (all->reg, from_mode);
 141   set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
 142 }
 143
 144 static void
 145 init_expmed_one_mode (struct init_expmed_rtl *all,
 146                       machine_mode mode, int speed)
 147 {
 148   int m, n, mode_bitsize;
 149   machine_mode mode_from;
 150
 151   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 152
 153   PUT_MODE (all->reg, mode);
 154   PUT_MODE (all->plus, mode);
 155   PUT_MODE (all->neg, mode);
 156   PUT_MODE (all->mult, mode);
 157   PUT_MODE (all->sdiv, mode);
 158   PUT_MODE (all->udiv, mode);
 159   PUT_MODE (all->sdiv_32, mode);
 160   PUT_MODE (all->smod_32, mode);
 161   PUT_MODE (all->wide_trunc, mode);
 162   PUT_MODE (all->shift, mode);
 163   PUT_MODE (all->shift_mult, mode);
 164   PUT_MODE (all->shift_add, mode);
 165   PUT_MODE (all->shift_sub0, mode);
 166   PUT_MODE (all->shift_sub1, mode);
 167   PUT_MODE (all->zext, mode);
 168   PUT_MODE (all->trunc, mode);
 169
 170   set_add_cost (speed, mode, set_src_cost (all->plus, speed));
 171   set_neg_cost (speed, mode, set_src_cost (all->neg, speed));
 172   set_mul_cost (speed, mode, set_src_cost (all->mult, speed));
 173   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, speed));
 174   set_udiv_cost (speed, mode, set_src_cost (all->udiv, speed));
 175
 176   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, speed)
 177                                      <= 2 * add_cost (speed, mode)));
 178   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, speed)
 179                                      <= 4 * add_cost (speed, mode)));
 180
 181   set_shift_cost (speed, mode, 0, 0);
 182   {
 183     int cost = add_cost (speed, mode);
 184     set_shiftadd_cost (speed, mode, 0, cost);
 185     set_shiftsub0_cost (speed, mode, 0, cost);
 186     set_shiftsub1_cost (speed, mode, 0, cost);
 187   }
 188
 189   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 190   for (m = 1; m < n; m++)
 191     {
 192       XEXP (all->shift, 1) = all->cint[m];
 193       XEXP (all->shift_mult, 1) = all->pow2[m];
 194
 195       set_shift_cost (speed, mode, m, set_src_cost (all->shift, speed));
 196       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, speed));
 197       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, speed));
 198       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, speed));
 199     }
 200
 201   if (SCALAR_INT_MODE_P (mode))
 202     {
 203       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 204            mode_from = (machine_mode)(mode_from + 1))
 205         init_expmed_one_conv (all, mode, mode_from, speed);
 206     }
 207   if (GET_MODE_CLASS (mode) == MODE_INT)
 208     {
 209       machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
 210       if (wider_mode != VOIDmode)
 211         {
 212           PUT_MODE (all->zext, wider_mode);
 213           PUT_MODE (all->wide_mult, wider_mode);
 214           PUT_MODE (all->wide_lshr, wider_mode);
 215           XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 216
 217           set_mul_widen_cost (speed, wider_mode,
 218                               set_src_cost (all->wide_mult, speed));
 219           set_mul_highpart_cost (speed, mode,
 220                                  set_src_cost (all->wide_trunc, speed));
 221         }
 222     }
 223 }
 224
 225 void
 226 init_expmed (void)
 227 {
 228   struct init_expmed_rtl all;
 229   machine_mode mode = QImode;
 230   int m, speed;
 231
 232   memset (&all, 0, sizeof all);
 233   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 234     {
 235       all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 236       all.cint[m] = GEN_INT (m);
 237     }
 238
 239   /* Avoid using hard regs in ways which may be unsupported.  */
 240   all.reg = gen_rtx_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 241   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 242   all.neg = gen_rtx_NEG (mode, all.reg);
 243   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 244   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 245   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 246   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 247   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 248   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 249   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 250   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 251   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 252   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 253   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 254   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 255   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 256   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 257   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 258
 259   for (speed = 0; speed < 2; speed++)
 260     {
 261       crtl->maybe_hot_insn_p = speed;
 262       set_zero_cost (speed, set_src_cost (const0_rtx, speed));
 263
 264       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 265            mode = (machine_mode)(mode + 1))
 266         init_expmed_one_mode (&all, mode, speed);
 267
 268       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 269         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 270              mode = (machine_mode)(mode + 1))
 271           init_expmed_one_mode (&all, mode, speed);
 272
 273       if (MIN_MODE_VECTOR_INT != VOIDmode)
 274         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 275              mode = (machine_mode)(mode + 1))
 276           init_expmed_one_mode (&all, mode, speed);
 277     }
 278
 279   if (alg_hash_used_p ())
 280     {
 281       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 282       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 283     }
 284   else
 285     set_alg_hash_used_p (true);
 286   default_rtl_profile ();
 287
 288   ggc_free (all.trunc);
 289   ggc_free (all.shift_sub1);
 290   ggc_free (all.shift_sub0);
 291   ggc_free (all.shift_add);
 292   ggc_free (all.shift_mult);
 293   ggc_free (all.shift);
 294   ggc_free (all.wide_trunc);
 295   ggc_free (all.wide_lshr);
 296   ggc_free (all.wide_mult);
 297   ggc_free (all.zext);
 298   ggc_free (all.smod_32);
 299   ggc_free (all.sdiv_32);
 300   ggc_free (all.udiv);
 301   ggc_free (all.sdiv);
 302   ggc_free (all.mult);
 303   ggc_free (all.neg);
 304   ggc_free (all.plus);
 305   ggc_free (all.reg);
 306 }
 307
 308 /* Return an rtx representing minus the value of X.
 309    MODE is the intended mode of the result,
 310    useful if X is a CONST_INT.  */
 311
 312 rtx
 313 negate_rtx (machine_mode mode, rtx x)
 314 {
 315   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 316
 317   if (result == 0)
 318     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 319
 320   return result;
 321 }
 322
 323 /* Adjust bitfield memory MEM so that it points to the first unit of mode
 324    MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
 325    If MODE is BLKmode, return a reference to every byte in the bitfield.
 326    Set *NEW_BITNUM to the bit position of the field within the new memory.  */
 327
 328 static rtx
 329 narrow_bit_field_mem (rtx mem, machine_mode mode,
 330                       unsigned HOST_WIDE_INT bitsize,
 331                       unsigned HOST_WIDE_INT bitnum,
 332                       unsigned HOST_WIDE_INT *new_bitnum)
 333 {
 334   if (mode == BLKmode)
 335     {
 336       *new_bitnum = bitnum % BITS_PER_UNIT;
 337       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 338       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 339                             / BITS_PER_UNIT);
 340       return adjust_bitfield_address_size (mem, mode, offset, size);
 341     }
 342   else
 343     {
 344       unsigned int unit = GET_MODE_BITSIZE (mode);
 345       *new_bitnum = bitnum % unit;
 346       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 347       return adjust_bitfield_address (mem, mode, offset);
 348     }
 349 }
 350
 351 /* The caller wants to perform insertion or extraction PATTERN on a
 352    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 353    BITREGION_START and BITREGION_END are as for store_bit_field
 354    and FIELDMODE is the natural mode of the field.
 355
 356    Search for a mode that is compatible with the memory access
 357    restrictions and (where applicable) with a register insertion or
 358    extraction.  Return the new memory on success, storing the adjusted
 359    bit position in *NEW_BITNUM.  Return null otherwise.  */
 360
 361 static rtx
 362 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 363                               rtx op0, HOST_WIDE_INT bitsize,
 364                               HOST_WIDE_INT bitnum,
 365                               unsigned HOST_WIDE_INT bitregion_start,
 366                               unsigned HOST_WIDE_INT bitregion_end,
 367                               machine_mode fieldmode,
 368                               unsigned HOST_WIDE_INT *new_bitnum)
 369 {
 370   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 371                                 bitregion_end, MEM_ALIGN (op0),
 372                                 MEM_VOLATILE_P (op0));
 373   machine_mode best_mode;
 374   if (iter.next_mode (&best_mode))
 375     {
 376       /* We can use a memory in BEST_MODE.  See whether this is true for
 377          any wider modes.  All other things being equal, we prefer to
 378          use the widest mode possible because it tends to expose more
 379          CSE opportunities.  */
 380       if (!iter.prefer_smaller_modes ())
 381         {
 382           /* Limit the search to the mode required by the corresponding
 383              register insertion or extraction instruction, if any.  */
 384           machine_mode limit_mode = word_mode;
 385           extraction_insn insn;
 386           if (get_best_reg_extraction_insn (&insn, pattern,
 387                                             GET_MODE_BITSIZE (best_mode),
 388                                             fieldmode))
 389             limit_mode = insn.field_mode;
 390
 391           machine_mode wider_mode;
 392           while (iter.next_mode (&wider_mode)
 393                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 394             best_mode = wider_mode;
 395         }
 396       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 397                                    new_bitnum);
 398     }
 399   return NULL_RTX;
 400 }
 401
 402 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 403    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 404    offset is then BITNUM / BITS_PER_UNIT.  */
 405
 406 static bool
 407 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 408                      unsigned HOST_WIDE_INT bitsize,
 409                      machine_mode struct_mode)
 410 {
 411   if (BYTES_BIG_ENDIAN)
 412     return (bitnum % BITS_PER_UNIT == 0
 413             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 414                 || (bitnum + bitsize) % BITS_PER_WORD == 0));
 415   else
 416     return bitnum % BITS_PER_WORD == 0;
 417 }
 418
 419 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 420    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 421    Return false if the access would touch memory outside the range
 422    BITREGION_START to BITREGION_END for conformance to the C++ memory
 423    model.  */
 424
 425 static bool
 426 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 427                             unsigned HOST_WIDE_INT bitnum,
 428                             machine_mode fieldmode,
 429                             unsigned HOST_WIDE_INT bitregion_start,
 430                             unsigned HOST_WIDE_INT bitregion_end)
 431 {
 432   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 433
 434   /* -fstrict-volatile-bitfields must be enabled and we must have a
 435      volatile MEM.  */
 436   if (!MEM_P (op0)
 437       || !MEM_VOLATILE_P (op0)
 438       || flag_strict_volatile_bitfields <= 0)
 439     return false;
 440
 441   /* Non-integral modes likely only happen with packed structures.
 442      Punt.  */
 443   if (!SCALAR_INT_MODE_P (fieldmode))
 444     return false;
 445
 446   /* The bit size must not be larger than the field mode, and
 447      the field mode must not be larger than a word.  */
 448   if (bitsize > modesize || modesize > BITS_PER_WORD)
 449     return false;
 450
 451   /* Check for cases of unaligned fields that must be split.  */
 452   if (bitnum % BITS_PER_UNIT + bitsize > modesize
 453       || (STRICT_ALIGNMENT
 454           && bitnum % GET_MODE_ALIGNMENT (fieldmode) + bitsize > modesize))
 455     return false;
 456
 457   /* Check for cases where the C++ memory model applies.  */
 458   if (bitregion_end != 0
 459       && (bitnum - bitnum % modesize < bitregion_start
 460           || bitnum - bitnum % modesize + modesize > bitregion_end))
 461     return false;
 462
 463   return true;
 464 }
 465
 466 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 467    bit number BITNUM can be treated as a simple value of mode MODE.  */
 468
 469 static bool
 470 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 471                        unsigned HOST_WIDE_INT bitnum, machine_mode mode)
 472 {
 473   return (MEM_P (op0)
 474           && bitnum % BITS_PER_UNIT == 0
 475           && bitsize == GET_MODE_BITSIZE (mode)
 476           && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
 477               || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
 478                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 479 }
 480 \f
 481 /* Try to use instruction INSV to store VALUE into a field of OP0.
 482    BITSIZE and BITNUM are as for store_bit_field.  */
 483
 484 static bool
 485 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 486                             unsigned HOST_WIDE_INT bitsize,
 487                             unsigned HOST_WIDE_INT bitnum,
 488                             rtx value)
 489 {
 490   struct expand_operand ops[4];
 491   rtx value1;
 492   rtx xop0 = op0;
 493   rtx_insn *last = get_last_insn ();
 494   bool copy_back = false;
 495
 496   machine_mode op_mode = insv->field_mode;
 497   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 498   if (bitsize == 0 || bitsize > unit)
 499     return false;
 500
 501   if (MEM_P (xop0))
 502     /* Get a reference to the first byte of the field.  */
 503     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 504                                  &bitnum);
 505   else
 506     {
 507       /* Convert from counting within OP0 to counting in OP_MODE.  */
 508       if (BYTES_BIG_ENDIAN)
 509         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 510
 511       /* If xop0 is a register, we need it in OP_MODE
 512          to make it acceptable to the format of insv.  */
 513       if (GET_CODE (xop0) == SUBREG)
 514         /* We can't just change the mode, because this might clobber op0,
 515            and we will need the original value of op0 if insv fails.  */
 516         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 517       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 518         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 519     }
 520
 521   /* If the destination is a paradoxical subreg such that we need a
 522      truncate to the inner mode, perform the insertion on a temporary and
 523      truncate the result to the original destination.  Note that we can't
 524      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 525      X) 0)) is (reg:N X).  */
 526   if (GET_CODE (xop0) == SUBREG
 527       && REG_P (SUBREG_REG (xop0))
 528       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 529                                          op_mode))
 530     {
 531       rtx tem = gen_reg_rtx (op_mode);
 532       emit_move_insn (tem, xop0);
 533       xop0 = tem;
 534       copy_back = true;
 535     }
 536
 537   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 538      "backwards" from the size of the unit we are inserting into.
 539      Otherwise, we count bits from the most significant on a
 540      BYTES/BITS_BIG_ENDIAN machine.  */
 541
 542   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 543     bitnum = unit - bitsize - bitnum;
 544
 545   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 546   value1 = value;
 547   if (GET_MODE (value) != op_mode)
 548     {
 549       if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 550         {
 551           /* Optimization: Don't bother really extending VALUE
 552              if it has all the bits we will actually use.  However,
 553              if we must narrow it, be sure we do it correctly.  */
 554
 555           if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 556             {
 557               rtx tmp;
 558
 559               tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 560               if (! tmp)
 561                 tmp = simplify_gen_subreg (op_mode,
 562                                            force_reg (GET_MODE (value),
 563                                                       value1),
 564                                            GET_MODE (value), 0);
 565               value1 = tmp;
 566             }
 567           else
 568             value1 = gen_lowpart (op_mode, value1);
 569         }
 570       else if (CONST_INT_P (value))
 571         value1 = gen_int_mode (INTVAL (value), op_mode);
 572       else
 573         /* Parse phase is supposed to make VALUE's data type
 574            match that of the component reference, which is a type
 575            at least as wide as the field; so VALUE should have
 576            a mode that corresponds to that type.  */
 577         gcc_assert (CONSTANT_P (value));
 578     }
 579
 580   create_fixed_operand (&ops[0], xop0);
 581   create_integer_operand (&ops[1], bitsize);
 582   create_integer_operand (&ops[2], bitnum);
 583   create_input_operand (&ops[3], value1, op_mode);
 584   if (maybe_expand_insn (insv->icode, 4, ops))
 585     {
 586       if (copy_back)
 587         convert_move (op0, xop0, true);
 588       return true;
 589     }
 590   delete_insns_since (last);
 591   return false;
 592 }
 593
 594 /* A subroutine of store_bit_field, with the same arguments.  Return true
 595    if the operation could be implemented.
 596
 597    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 598    no other way of implementing the operation.  If FALLBACK_P is false,
 599    return false instead.  */
 600
 601 static bool
 602 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 603                    unsigned HOST_WIDE_INT bitnum,
 604                    unsigned HOST_WIDE_INT bitregion_start,
 605                    unsigned HOST_WIDE_INT bitregion_end,
 606                    machine_mode fieldmode,
 607                    rtx value, bool fallback_p)
 608 {
 609   rtx op0 = str_rtx;
 610   rtx orig_value;
 611
 612   while (GET_CODE (op0) == SUBREG)
 613     {
 614       /* The following line once was done only if WORDS_BIG_ENDIAN,
 615          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 616          meaningful at a much higher level; when structures are copied
 617          between memory and regs, the higher-numbered regs
 618          always get higher addresses.  */
 619       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 620       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 621       int byte_offset = 0;
 622
 623       /* Paradoxical subregs need special handling on big endian machines.  */
 624       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 625         {
 626           int difference = inner_mode_size - outer_mode_size;
 627
 628           if (WORDS_BIG_ENDIAN)
 629             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 630           if (BYTES_BIG_ENDIAN)
 631             byte_offset += difference % UNITS_PER_WORD;
 632         }
 633       else
 634         byte_offset = SUBREG_BYTE (op0);
 635
 636       bitnum += byte_offset * BITS_PER_UNIT;
 637       op0 = SUBREG_REG (op0);
 638     }
 639
 640   /* No action is needed if the target is a register and if the field
 641      lies completely outside that register.  This can occur if the source
 642      code contains an out-of-bounds access to a small array.  */
 643   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 644     return true;
 645
 646   /* Use vec_set patterns for inserting parts of vectors whenever
 647      available.  */
 648   if (VECTOR_MODE_P (GET_MODE (op0))
 649       && !MEM_P (op0)
 650       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 651       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 652       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 653       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 654     {
 655       struct expand_operand ops[3];
 656       machine_mode outermode = GET_MODE (op0);
 657       machine_mode innermode = GET_MODE_INNER (outermode);
 658       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 659       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 660
 661       create_fixed_operand (&ops[0], op0);
 662       create_input_operand (&ops[1], value, innermode);
 663       create_integer_operand (&ops[2], pos);
 664       if (maybe_expand_insn (icode, 3, ops))
 665         return true;
 666     }
 667
 668   /* If the target is a register, overwriting the entire object, or storing
 669      a full-word or multi-word field can be done with just a SUBREG.  */
 670   if (!MEM_P (op0)
 671       && bitsize == GET_MODE_BITSIZE (fieldmode)
 672       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 673           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 674     {
 675       /* Use the subreg machinery either to narrow OP0 to the required
 676          words or to cope with mode punning between equal-sized modes.
 677          In the latter case, use subreg on the rhs side, not lhs.  */
 678       rtx sub;
 679
 680       if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
 681         {
 682           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 683           if (sub)
 684             {
 685               emit_move_insn (op0, sub);
 686               return true;
 687             }
 688         }
 689       else
 690         {
 691           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 692                                      bitnum / BITS_PER_UNIT);
 693           if (sub)
 694             {
 695               emit_move_insn (sub, value);
 696               return true;
 697             }
 698         }
 699     }
 700
 701   /* If the target is memory, storing any naturally aligned field can be
 702      done with a simple store.  For targets that support fast unaligned
 703      memory, any naturally sized, unit aligned field can be done directly.  */
 704   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
 705     {
 706       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 707       emit_move_insn (op0, value);
 708       return true;
 709     }
 710
 711   /* Make sure we are playing with integral modes.  Pun with subregs
 712      if we aren't.  This must come after the entire register case above,
 713      since that case is valid for any mode.  The following cases are only
 714      valid for integral modes.  */
 715   {
 716     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 717     if (imode != GET_MODE (op0))
 718       {
 719         if (MEM_P (op0))
 720           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
 721         else
 722           {
 723             gcc_assert (imode != BLKmode);
 724             op0 = gen_lowpart (imode, op0);
 725           }
 726       }
 727   }
 728
 729   /* Storing an lsb-aligned field in a register
 730      can be done with a movstrict instruction.  */
 731
 732   if (!MEM_P (op0)
 733       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 734       && bitsize == GET_MODE_BITSIZE (fieldmode)
 735       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 736     {
 737       struct expand_operand ops[2];
 738       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 739       rtx arg0 = op0;
 740       unsigned HOST_WIDE_INT subreg_off;
 741
 742       if (GET_CODE (arg0) == SUBREG)
 743         {
 744           /* Else we've got some float mode source being extracted into
 745              a different float mode destination -- this combination of
 746              subregs results in Severe Tire Damage.  */
 747           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 748                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 749                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 750           arg0 = SUBREG_REG (arg0);
 751         }
 752
 753       subreg_off = bitnum / BITS_PER_UNIT;
 754       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 755         {
 756           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 757
 758           create_fixed_operand (&ops[0], arg0);
 759           /* Shrink the source operand to FIELDMODE.  */
 760           create_convert_operand_to (&ops[1], value, fieldmode, false);
 761           if (maybe_expand_insn (icode, 2, ops))
 762             return true;
 763         }
 764     }
 765
 766   /* Handle fields bigger than a word.  */
 767
 768   if (bitsize > BITS_PER_WORD)
 769     {
 770       /* Here we transfer the words of the field
 771          in the order least significant first.
 772          This is because the most significant word is the one which may
 773          be less than full.
 774          However, only do that if the value is not BLKmode.  */
 775
 776       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 777       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 778       unsigned int i;
 779       rtx_insn *last;
 780
 781       /* This is the mode we must force value to, so that there will be enough
 782          subwords to extract.  Note that fieldmode will often (always?) be
 783          VOIDmode, because that is what store_field uses to indicate that this
 784          is a bit field, but passing VOIDmode to operand_subword_force
 785          is not allowed.  */
 786       fieldmode = GET_MODE (value);
 787       if (fieldmode == VOIDmode)
 788         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 789
 790       last = get_last_insn ();
 791       for (i = 0; i < nwords; i++)
 792         {
 793           /* If I is 0, use the low-order word in both field and target;
 794              if I is 1, use the next to lowest word; and so on.  */
 795           unsigned int wordnum = (backwards
 796                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 797                                   - i - 1
 798                                   : i);
 799           unsigned int bit_offset = (backwards
 800                                      ? MAX ((int) bitsize - ((int) i + 1)
 801                                             * BITS_PER_WORD,
 802                                             0)
 803                                      : (int) i * BITS_PER_WORD);
 804           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 805           unsigned HOST_WIDE_INT new_bitsize =
 806             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 807
 808           /* If the remaining chunk doesn't have full wordsize we have
 809              to make sure that for big endian machines the higher order
 810              bits are used.  */
 811           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 812             value_word = simplify_expand_binop (word_mode, lshr_optab,
 813                                                 value_word,
 814                                                 GEN_INT (BITS_PER_WORD
 815                                                          - new_bitsize),
 816                                                 NULL_RTX, true,
 817                                                 OPTAB_LIB_WIDEN);
 818
 819           if (!store_bit_field_1 (op0, new_bitsize,
 820                                   bitnum + bit_offset,
 821                                   bitregion_start, bitregion_end,
 822                                   word_mode,
 823                                   value_word, fallback_p))
 824             {
 825               delete_insns_since (last);
 826               return false;
 827             }
 828         }
 829       return true;
 830     }
 831
 832   /* If VALUE has a floating-point or complex mode, access it as an
 833      integer of the corresponding size.  This can occur on a machine
 834      with 64 bit registers that uses SFmode for float.  It can also
 835      occur for unaligned float or complex fields.  */
 836   orig_value = value;
 837   if (GET_MODE (value) != VOIDmode
 838       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 839       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 840     {
 841       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 842       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 843     }
 844
 845   /* If OP0 is a multi-word register, narrow it to the affected word.
 846      If the region spans two words, defer to store_split_bit_field.  */
 847   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 848     {
 849       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
 850                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 851       gcc_assert (op0);
 852       bitnum %= BITS_PER_WORD;
 853       if (bitnum + bitsize > BITS_PER_WORD)
 854         {
 855           if (!fallback_p)
 856             return false;
 857
 858           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 859                                  bitregion_end, value);
 860           return true;
 861         }
 862     }
 863
 864   /* From here on we can assume that the field to be stored in fits
 865      within a word.  If the destination is a register, it too fits
 866      in a word.  */
 867
 868   extraction_insn insv;
 869   if (!MEM_P (op0)
 870       && get_best_reg_extraction_insn (&insv, EP_insv,
 871                                        GET_MODE_BITSIZE (GET_MODE (op0)),
 872                                        fieldmode)
 873       && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 874     return true;
 875
 876   /* If OP0 is a memory, try copying it to a register and seeing if a
 877      cheap register alternative is available.  */
 878   if (MEM_P (op0))
 879     {
 880       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
 881                                         fieldmode)
 882           && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 883         return true;
 884
 885       rtx_insn *last = get_last_insn ();
 886
 887       /* Try loading part of OP0 into a register, inserting the bitfield
 888          into that, and then copying the result back to OP0.  */
 889       unsigned HOST_WIDE_INT bitpos;
 890       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
 891                                                bitregion_start, bitregion_end,
 892                                                fieldmode, &bitpos);
 893       if (xop0)
 894         {
 895           rtx tempreg = copy_to_reg (xop0);
 896           if (store_bit_field_1 (tempreg, bitsize, bitpos,
 897                                  bitregion_start, bitregion_end,
 898                                  fieldmode, orig_value, false))
 899             {
 900               emit_move_insn (xop0, tempreg);
 901               return true;
 902             }
 903           delete_insns_since (last);
 904         }
 905     }
 906
 907   if (!fallback_p)
 908     return false;
 909
 910   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
 911                          bitregion_end, value);
 912   return true;
 913 }
 914
 915 /* Generate code to store value from rtx VALUE
 916    into a bit-field within structure STR_RTX
 917    containing BITSIZE bits starting at bit BITNUM.
 918
 919    BITREGION_START is bitpos of the first bitfield in this region.
 920    BITREGION_END is the bitpos of the ending bitfield in this region.
 921    These two fields are 0, if the C++ memory model does not apply,
 922    or we are not interested in keeping track of bitfield regions.
 923
 924    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 925
 926 void
 927 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 928                  unsigned HOST_WIDE_INT bitnum,
 929                  unsigned HOST_WIDE_INT bitregion_start,
 930                  unsigned HOST_WIDE_INT bitregion_end,
 931                  machine_mode fieldmode,
 932                  rtx value)
 933 {
 934   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
 935   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
 936                                   bitregion_start, bitregion_end))
 937     {
 938       /* Storing any naturally aligned field can be done with a simple
 939          store.  For targets that support fast unaligned memory, any
 940          naturally sized, unit aligned field can be done directly.  */
 941       if (simple_mem_bitfield_p (str_rtx, bitsize, bitnum, fieldmode))
 942         {
 943           str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
 944                                              bitnum / BITS_PER_UNIT);
 945           emit_move_insn (str_rtx, value);
 946         }
 947       else
 948         {
 949           str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
 950                                           &bitnum);
 951           /* Explicitly override the C/C++ memory model; ignore the
 952              bit range so that we can do the access in the mode mandated
 953              by -fstrict-volatile-bitfields instead.  */
 954           store_fixed_bit_field_1 (str_rtx, bitsize, bitnum, value);
 955         }
 956
 957       return;
 958     }
 959
 960   /* Under the C++0x memory model, we must not touch bits outside the
 961      bit region.  Adjust the address to start at the beginning of the
 962      bit region.  */
 963   if (MEM_P (str_rtx) && bitregion_start > 0)
 964     {
 965       machine_mode bestmode;
 966       HOST_WIDE_INT offset, size;
 967
 968       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
 969
 970       offset = bitregion_start / BITS_PER_UNIT;
 971       bitnum -= bitregion_start;
 972       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
 973       bitregion_end -= bitregion_start;
 974       bitregion_start = 0;
 975       bestmode = get_best_mode (bitsize, bitnum,
 976                                 bitregion_start, bitregion_end,
 977                                 MEM_ALIGN (str_rtx), VOIDmode,
 978                                 MEM_VOLATILE_P (str_rtx));
 979       str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
 980     }
 981
 982   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
 983                           bitregion_start, bitregion_end,
 984                           fieldmode, value, true))
 985     gcc_unreachable ();
 986 }
 987 \f
 988 /* Use shifts and boolean operations to store VALUE into a bit field of
 989    width BITSIZE in OP0, starting at bit BITNUM.  */
 990
 991 static void
 992 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 993                        unsigned HOST_WIDE_INT bitnum,
 994                        unsigned HOST_WIDE_INT bitregion_start,
 995                        unsigned HOST_WIDE_INT bitregion_end,
 996                        rtx value)
 997 {
 998   /* There is a case not handled here:
 999      a structure with a known alignment of just a halfword
1000      and a field split across two aligned halfwords within the structure.
1001      Or likewise a structure with a known alignment of just a byte
1002      and a field split across two bytes.
1003      Such cases are not supposed to be able to occur.  */
1004
1005   if (MEM_P (op0))
1006     {
1007       machine_mode mode = GET_MODE (op0);
1008       if (GET_MODE_BITSIZE (mode) == 0
1009           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1010         mode = word_mode;
1011       mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1012                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1013
1014       if (mode == VOIDmode)
1015         {
1016           /* The only way this should occur is if the field spans word
1017              boundaries.  */
1018           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1019                                  bitregion_end, value);
1020           return;
1021         }
1022
1023       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1024     }
1025
1026   store_fixed_bit_field_1 (op0, bitsize, bitnum, value);
1027 }
1028
1029 /* Helper function for store_fixed_bit_field, stores
1030    the bit field always using the MODE of OP0.  */
1031
1032 static void
1033 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1034                          unsigned HOST_WIDE_INT bitnum,
1035                          rtx value)
1036 {
1037   machine_mode mode;
1038   rtx temp;
1039   int all_zero = 0;
1040   int all_one = 0;
1041
1042   mode = GET_MODE (op0);
1043   gcc_assert (SCALAR_INT_MODE_P (mode));
1044
1045   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1046      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1047
1048   if (BYTES_BIG_ENDIAN)
1049     /* BITNUM is the distance between our msb
1050        and that of the containing datum.
1051        Convert it to the distance from the lsb.  */
1052     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1053
1054   /* Now BITNUM is always the distance between our lsb
1055      and that of OP0.  */
1056
1057   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1058      we must first convert its mode to MODE.  */
1059
1060   if (CONST_INT_P (value))
1061     {
1062       unsigned HOST_WIDE_INT v = UINTVAL (value);
1063
1064       if (bitsize < HOST_BITS_PER_WIDE_INT)
1065         v &= ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1;
1066
1067       if (v == 0)
1068         all_zero = 1;
1069       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1070                 && v == ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1)
1071                || (bitsize == HOST_BITS_PER_WIDE_INT
1072                    && v == (unsigned HOST_WIDE_INT) -1))
1073         all_one = 1;
1074
1075       value = lshift_value (mode, v, bitnum);
1076     }
1077   else
1078     {
1079       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1080                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1081
1082       if (GET_MODE (value) != mode)
1083         value = convert_to_mode (mode, value, 1);
1084
1085       if (must_and)
1086         value = expand_binop (mode, and_optab, value,
1087                               mask_rtx (mode, 0, bitsize, 0),
1088                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1089       if (bitnum > 0)
1090         value = expand_shift (LSHIFT_EXPR, mode, value,
1091                               bitnum, NULL_RTX, 1);
1092     }
1093
1094   /* Now clear the chosen bits in OP0,
1095      except that if VALUE is -1 we need not bother.  */
1096   /* We keep the intermediates in registers to allow CSE to combine
1097      consecutive bitfield assignments.  */
1098
1099   temp = force_reg (mode, op0);
1100
1101   if (! all_one)
1102     {
1103       temp = expand_binop (mode, and_optab, temp,
1104                            mask_rtx (mode, bitnum, bitsize, 1),
1105                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1106       temp = force_reg (mode, temp);
1107     }
1108
1109   /* Now logical-or VALUE into OP0, unless it is zero.  */
1110
1111   if (! all_zero)
1112     {
1113       temp = expand_binop (mode, ior_optab, temp, value,
1114                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1115       temp = force_reg (mode, temp);
1116     }
1117
1118   if (op0 != temp)
1119     {
1120       op0 = copy_rtx (op0);
1121       emit_move_insn (op0, temp);
1122     }
1123 }
1124 \f
1125 /* Store a bit field that is split across multiple accessible memory objects.
1126
1127    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1128    BITSIZE is the field width; BITPOS the position of its first bit
1129    (within the word).
1130    VALUE is the value to store.
1131
1132    This does not yet handle fields wider than BITS_PER_WORD.  */
1133
1134 static void
1135 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1136                        unsigned HOST_WIDE_INT bitpos,
1137                        unsigned HOST_WIDE_INT bitregion_start,
1138                        unsigned HOST_WIDE_INT bitregion_end,
1139                        rtx value)
1140 {
1141   unsigned int unit;
1142   unsigned int bitsdone = 0;
1143
1144   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1145      much at a time.  */
1146   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1147     unit = BITS_PER_WORD;
1148   else
1149     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1150
1151   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1152      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1153      again, and we will mutually recurse forever.  */
1154   if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1155     unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1156
1157   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1158      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1159      that VALUE might be a floating-point constant.  */
1160   if (CONSTANT_P (value) && !CONST_INT_P (value))
1161     {
1162       rtx word = gen_lowpart_common (word_mode, value);
1163
1164       if (word && (value != word))
1165         value = word;
1166       else
1167         value = gen_lowpart_common (word_mode,
1168                                     force_reg (GET_MODE (value) != VOIDmode
1169                                                ? GET_MODE (value)
1170                                                : word_mode, value));
1171     }
1172
1173   while (bitsdone < bitsize)
1174     {
1175       unsigned HOST_WIDE_INT thissize;
1176       rtx part, word;
1177       unsigned HOST_WIDE_INT thispos;
1178       unsigned HOST_WIDE_INT offset;
1179
1180       offset = (bitpos + bitsdone) / unit;
1181       thispos = (bitpos + bitsdone) % unit;
1182
1183       /* When region of bytes we can touch is restricted, decrease
1184          UNIT close to the end of the region as needed.  If op0 is a REG
1185          or SUBREG of REG, don't do this, as there can't be data races
1186          on a register and we can expand shorter code in some cases.  */
1187       if (bitregion_end
1188           && unit > BITS_PER_UNIT
1189           && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1190           && !REG_P (op0)
1191           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1192         {
1193           unit = unit / 2;
1194           continue;
1195         }
1196
1197       /* THISSIZE must not overrun a word boundary.  Otherwise,
1198          store_fixed_bit_field will call us again, and we will mutually
1199          recurse forever.  */
1200       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1201       thissize = MIN (thissize, unit - thispos);
1202
1203       if (BYTES_BIG_ENDIAN)
1204         {
1205           /* Fetch successively less significant portions.  */
1206           if (CONST_INT_P (value))
1207             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1208                              >> (bitsize - bitsdone - thissize))
1209                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1210           else
1211             {
1212               int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1213               /* The args are chosen so that the last part includes the
1214                  lsb.  Give extract_bit_field the value it needs (with
1215                  endianness compensation) to fetch the piece we want.  */
1216               part = extract_fixed_bit_field (word_mode, value, thissize,
1217                                               total_bits - bitsize + bitsdone,
1218                                               NULL_RTX, 1);
1219             }
1220         }
1221       else
1222         {
1223           /* Fetch successively more significant portions.  */
1224           if (CONST_INT_P (value))
1225             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1226                              >> bitsdone)
1227                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1228           else
1229             part = extract_fixed_bit_field (word_mode, value, thissize,
1230                                             bitsdone, NULL_RTX, 1);
1231         }
1232
1233       /* If OP0 is a register, then handle OFFSET here.
1234
1235          When handling multiword bitfields, extract_bit_field may pass
1236          down a word_mode SUBREG of a larger REG for a bitfield that actually
1237          crosses a word boundary.  Thus, for a SUBREG, we must find
1238          the current word starting from the base register.  */
1239       if (GET_CODE (op0) == SUBREG)
1240         {
1241           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD)
1242                             + (offset * unit / BITS_PER_WORD);
1243           machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1244           if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1245             word = word_offset ? const0_rtx : op0;
1246           else
1247             word = operand_subword_force (SUBREG_REG (op0), word_offset,
1248                                           GET_MODE (SUBREG_REG (op0)));
1249           offset &= BITS_PER_WORD / unit - 1;
1250         }
1251       else if (REG_P (op0))
1252         {
1253           machine_mode op0_mode = GET_MODE (op0);
1254           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1255             word = offset ? const0_rtx : op0;
1256           else
1257             word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1258                                           GET_MODE (op0));
1259           offset &= BITS_PER_WORD / unit - 1;
1260         }
1261       else
1262         word = op0;
1263
1264       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1265          it is just an out-of-bounds access.  Ignore it.  */
1266       if (word != const0_rtx)
1267         store_fixed_bit_field (word, thissize, offset * unit + thispos,
1268                                bitregion_start, bitregion_end, part);
1269       bitsdone += thissize;
1270     }
1271 }
1272 \f
1273 /* A subroutine of extract_bit_field_1 that converts return value X
1274    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1275    to extract_bit_field.  */
1276
1277 static rtx
1278 convert_extracted_bit_field (rtx x, machine_mode mode,
1279                              machine_mode tmode, bool unsignedp)
1280 {
1281   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1282     return x;
1283
1284   /* If the x mode is not a scalar integral, first convert to the
1285      integer mode of that size and then access it as a floating-point
1286      value via a SUBREG.  */
1287   if (!SCALAR_INT_MODE_P (tmode))
1288     {
1289       machine_mode smode;
1290
1291       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1292       x = convert_to_mode (smode, x, unsignedp);
1293       x = force_reg (smode, x);
1294       return gen_lowpart (tmode, x);
1295     }
1296
1297   return convert_to_mode (tmode, x, unsignedp);
1298 }
1299
1300 /* Try to use an ext(z)v pattern to extract a field from OP0.
1301    Return the extracted value on success, otherwise return null.
1302    EXT_MODE is the mode of the extraction and the other arguments
1303    are as for extract_bit_field.  */
1304
1305 static rtx
1306 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1307                               unsigned HOST_WIDE_INT bitsize,
1308                               unsigned HOST_WIDE_INT bitnum,
1309                               int unsignedp, rtx target,
1310                               machine_mode mode, machine_mode tmode)
1311 {
1312   struct expand_operand ops[4];
1313   rtx spec_target = target;
1314   rtx spec_target_subreg = 0;
1315   machine_mode ext_mode = extv->field_mode;
1316   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1317
1318   if (bitsize == 0 || unit < bitsize)
1319     return NULL_RTX;
1320
1321   if (MEM_P (op0))
1322     /* Get a reference to the first byte of the field.  */
1323     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1324                                 &bitnum);
1325   else
1326     {
1327       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1328       if (BYTES_BIG_ENDIAN)
1329         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1330
1331       /* If op0 is a register, we need it in EXT_MODE to make it
1332          acceptable to the format of ext(z)v.  */
1333       if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1334         return NULL_RTX;
1335       if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1336         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1337     }
1338
1339   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1340      "backwards" from the size of the unit we are extracting from.
1341      Otherwise, we count bits from the most significant on a
1342      BYTES/BITS_BIG_ENDIAN machine.  */
1343
1344   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1345     bitnum = unit - bitsize - bitnum;
1346
1347   if (target == 0)
1348     target = spec_target = gen_reg_rtx (tmode);
1349
1350   if (GET_MODE (target) != ext_mode)
1351     {
1352       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1353          between the mode of the extraction (word_mode) and the target
1354          mode.  Instead, create a temporary and use convert_move to set
1355          the target.  */
1356       if (REG_P (target)
1357           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1358         {
1359           target = gen_lowpart (ext_mode, target);
1360           if (GET_MODE_PRECISION (ext_mode)
1361               > GET_MODE_PRECISION (GET_MODE (spec_target)))
1362             spec_target_subreg = target;
1363         }
1364       else
1365         target = gen_reg_rtx (ext_mode);
1366     }
1367
1368   create_output_operand (&ops[0], target, ext_mode);
1369   create_fixed_operand (&ops[1], op0);
1370   create_integer_operand (&ops[2], bitsize);
1371   create_integer_operand (&ops[3], bitnum);
1372   if (maybe_expand_insn (extv->icode, 4, ops))
1373     {
1374       target = ops[0].value;
1375       if (target == spec_target)
1376         return target;
1377       if (target == spec_target_subreg)
1378         return spec_target;
1379       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1380     }
1381   return NULL_RTX;
1382 }
1383
1384 /* A subroutine of extract_bit_field, with the same arguments.
1385    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1386    if we can find no other means of implementing the operation.
1387    if FALLBACK_P is false, return NULL instead.  */
1388
1389 static rtx
1390 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1391                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1392                      machine_mode mode, machine_mode tmode,
1393                      bool fallback_p)
1394 {
1395   rtx op0 = str_rtx;
1396   machine_mode int_mode;
1397   machine_mode mode1;
1398
1399   if (tmode == VOIDmode)
1400     tmode = mode;
1401
1402   while (GET_CODE (op0) == SUBREG)
1403     {
1404       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1405       op0 = SUBREG_REG (op0);
1406     }
1407
1408   /* If we have an out-of-bounds access to a register, just return an
1409      uninitialized register of the required mode.  This can occur if the
1410      source code contains an out-of-bounds access to a small array.  */
1411   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1412     return gen_reg_rtx (tmode);
1413
1414   if (REG_P (op0)
1415       && mode == GET_MODE (op0)
1416       && bitnum == 0
1417       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1418     {
1419       /* We're trying to extract a full register from itself.  */
1420       return op0;
1421     }
1422
1423   /* See if we can get a better vector mode before extracting.  */
1424   if (VECTOR_MODE_P (GET_MODE (op0))
1425       && !MEM_P (op0)
1426       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1427     {
1428       machine_mode new_mode;
1429
1430       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1431         new_mode = MIN_MODE_VECTOR_FLOAT;
1432       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1433         new_mode = MIN_MODE_VECTOR_FRACT;
1434       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1435         new_mode = MIN_MODE_VECTOR_UFRACT;
1436       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1437         new_mode = MIN_MODE_VECTOR_ACCUM;
1438       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1439         new_mode = MIN_MODE_VECTOR_UACCUM;
1440       else
1441         new_mode = MIN_MODE_VECTOR_INT;
1442
1443       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1444         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1445             && targetm.vector_mode_supported_p (new_mode))
1446           break;
1447       if (new_mode != VOIDmode)
1448         op0 = gen_lowpart (new_mode, op0);
1449     }
1450
1451   /* Use vec_extract patterns for extracting parts of vectors whenever
1452      available.  */
1453   if (VECTOR_MODE_P (GET_MODE (op0))
1454       && !MEM_P (op0)
1455       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1456       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1457           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1458     {
1459       struct expand_operand ops[3];
1460       machine_mode outermode = GET_MODE (op0);
1461       machine_mode innermode = GET_MODE_INNER (outermode);
1462       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1463       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1464
1465       create_output_operand (&ops[0], target, innermode);
1466       create_input_operand (&ops[1], op0, outermode);
1467       create_integer_operand (&ops[2], pos);
1468       if (maybe_expand_insn (icode, 3, ops))
1469         {
1470           target = ops[0].value;
1471           if (GET_MODE (target) != mode)
1472             return gen_lowpart (tmode, target);
1473           return target;
1474         }
1475     }
1476
1477   /* Make sure we are playing with integral modes.  Pun with subregs
1478      if we aren't.  */
1479   {
1480     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1481     if (imode != GET_MODE (op0))
1482       {
1483         if (MEM_P (op0))
1484           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1485         else if (imode != BLKmode)
1486           {
1487             op0 = gen_lowpart (imode, op0);
1488
1489             /* If we got a SUBREG, force it into a register since we
1490                aren't going to be able to do another SUBREG on it.  */
1491             if (GET_CODE (op0) == SUBREG)
1492               op0 = force_reg (imode, op0);
1493           }
1494         else if (REG_P (op0))
1495           {
1496             rtx reg, subreg;
1497             imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1498                                             MODE_INT);
1499             reg = gen_reg_rtx (imode);
1500             subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1501             emit_move_insn (subreg, op0);
1502             op0 = reg;
1503             bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1504           }
1505         else
1506           {
1507             HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1508             rtx mem = assign_stack_temp (GET_MODE (op0), size);
1509             emit_move_insn (mem, op0);
1510             op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1511           }
1512       }
1513   }
1514
1515   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1516      If that's wrong, the solution is to test for it and set TARGET to 0
1517      if needed.  */
1518
1519   /* Get the mode of the field to use for atomic access or subreg
1520      conversion.  */
1521   mode1 = mode;
1522   if (SCALAR_INT_MODE_P (tmode))
1523     {
1524       machine_mode try_mode = mode_for_size (bitsize,
1525                                                   GET_MODE_CLASS (tmode), 0);
1526       if (try_mode != BLKmode)
1527         mode1 = try_mode;
1528     }
1529   gcc_assert (mode1 != BLKmode);
1530
1531   /* Extraction of a full MODE1 value can be done with a subreg as long
1532      as the least significant bit of the value is the least significant
1533      bit of either OP0 or a word of OP0.  */
1534   if (!MEM_P (op0)
1535       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1536       && bitsize == GET_MODE_BITSIZE (mode1)
1537       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1538     {
1539       rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1540                                      bitnum / BITS_PER_UNIT);
1541       if (sub)
1542         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1543     }
1544
1545   /* Extraction of a full MODE1 value can be done with a load as long as
1546      the field is on a byte boundary and is sufficiently aligned.  */
1547   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1548     {
1549       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1550       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1551     }
1552
1553   /* Handle fields bigger than a word.  */
1554
1555   if (bitsize > BITS_PER_WORD)
1556     {
1557       /* Here we transfer the words of the field
1558          in the order least significant first.
1559          This is because the most significant word is the one which may
1560          be less than full.  */
1561
1562       unsigned int backwards = WORDS_BIG_ENDIAN;
1563       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1564       unsigned int i;
1565       rtx_insn *last;
1566
1567       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1568         target = gen_reg_rtx (mode);
1569
1570       /* Indicate for flow that the entire target reg is being set.  */
1571       emit_clobber (target);
1572
1573       last = get_last_insn ();
1574       for (i = 0; i < nwords; i++)
1575         {
1576           /* If I is 0, use the low-order word in both field and target;
1577              if I is 1, use the next to lowest word; and so on.  */
1578           /* Word number in TARGET to use.  */
1579           unsigned int wordnum
1580             = (backwards
1581                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1582                : i);
1583           /* Offset from start of field in OP0.  */
1584           unsigned int bit_offset = (backwards
1585                                      ? MAX ((int) bitsize - ((int) i + 1)
1586                                             * BITS_PER_WORD,
1587                                             0)
1588                                      : (int) i * BITS_PER_WORD);
1589           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1590           rtx result_part
1591             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1592                                              bitsize - i * BITS_PER_WORD),
1593                                    bitnum + bit_offset, 1, target_part,
1594                                    mode, word_mode, fallback_p);
1595
1596           gcc_assert (target_part);
1597           if (!result_part)
1598             {
1599               delete_insns_since (last);
1600               return NULL;
1601             }
1602
1603           if (result_part != target_part)
1604             emit_move_insn (target_part, result_part);
1605         }
1606
1607       if (unsignedp)
1608         {
1609           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1610              need to be zero'd out.  */
1611           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1612             {
1613               unsigned int i, total_words;
1614
1615               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1616               for (i = nwords; i < total_words; i++)
1617                 emit_move_insn
1618                   (operand_subword (target,
1619                                     backwards ? total_words - i - 1 : i,
1620                                     1, VOIDmode),
1621                    const0_rtx);
1622             }
1623           return target;
1624         }
1625
1626       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1627       target = expand_shift (LSHIFT_EXPR, mode, target,
1628                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1629       return expand_shift (RSHIFT_EXPR, mode, target,
1630                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1631     }
1632
1633   /* If OP0 is a multi-word register, narrow it to the affected word.
1634      If the region spans two words, defer to extract_split_bit_field.  */
1635   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1636     {
1637       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1638                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1639       bitnum %= BITS_PER_WORD;
1640       if (bitnum + bitsize > BITS_PER_WORD)
1641         {
1642           if (!fallback_p)
1643             return NULL_RTX;
1644           target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1645           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1646         }
1647     }
1648
1649   /* From here on we know the desired field is smaller than a word.
1650      If OP0 is a register, it too fits within a word.  */
1651   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1652   extraction_insn extv;
1653   if (!MEM_P (op0)
1654       /* ??? We could limit the structure size to the part of OP0 that
1655          contains the field, with appropriate checks for endianness
1656          and TRULY_NOOP_TRUNCATION.  */
1657       && get_best_reg_extraction_insn (&extv, pattern,
1658                                        GET_MODE_BITSIZE (GET_MODE (op0)),
1659                                        tmode))
1660     {
1661       rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1662                                                  unsignedp, target, mode,
1663                                                  tmode);
1664       if (result)
1665         return result;
1666     }
1667
1668   /* If OP0 is a memory, try copying it to a register and seeing if a
1669      cheap register alternative is available.  */
1670   if (MEM_P (op0))
1671     {
1672       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1673                                         tmode))
1674         {
1675           rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1676                                                      bitnum, unsignedp,
1677                                                      target, mode,
1678                                                      tmode);
1679           if (result)
1680             return result;
1681         }
1682
1683       rtx_insn *last = get_last_insn ();
1684
1685       /* Try loading part of OP0 into a register and extracting the
1686          bitfield from that.  */
1687       unsigned HOST_WIDE_INT bitpos;
1688       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1689                                                0, 0, tmode, &bitpos);
1690       if (xop0)
1691         {
1692           xop0 = copy_to_reg (xop0);
1693           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1694                                             unsignedp, target,
1695                                             mode, tmode, false);
1696           if (result)
1697             return result;
1698           delete_insns_since (last);
1699         }
1700     }
1701
1702   if (!fallback_p)
1703     return NULL;
1704
1705   /* Find a correspondingly-sized integer field, so we can apply
1706      shifts and masks to it.  */
1707   int_mode = int_mode_for_mode (tmode);
1708   if (int_mode == BLKmode)
1709     int_mode = int_mode_for_mode (mode);
1710   /* Should probably push op0 out to memory and then do a load.  */
1711   gcc_assert (int_mode != BLKmode);
1712
1713   target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
1714                                     target, unsignedp);
1715   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1716 }
1717
1718 /* Generate code to extract a byte-field from STR_RTX
1719    containing BITSIZE bits, starting at BITNUM,
1720    and put it in TARGET if possible (if TARGET is nonzero).
1721    Regardless of TARGET, we return the rtx for where the value is placed.
1722
1723    STR_RTX is the structure containing the byte (a REG or MEM).
1724    UNSIGNEDP is nonzero if this is an unsigned bit field.
1725    MODE is the natural mode of the field value once extracted.
1726    TMODE is the mode the caller would like the value to have;
1727    but the value may be returned with type MODE instead.
1728
1729    If a TARGET is specified and we can store in it at no extra cost,
1730    we do so, and return TARGET.
1731    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1732    if they are equally easy.  */
1733
1734 rtx
1735 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1736                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1737                    machine_mode mode, machine_mode tmode)
1738 {
1739   machine_mode mode1;
1740
1741   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1742   if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1743     mode1 = GET_MODE (str_rtx);
1744   else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1745     mode1 = GET_MODE (target);
1746   else
1747     mode1 = tmode;
1748
1749   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1750     {
1751       rtx result;
1752
1753       /* Extraction of a full MODE1 value can be done with a load as long as
1754          the field is on a byte boundary and is sufficiently aligned.  */
1755       if (simple_mem_bitfield_p (str_rtx, bitsize, bitnum, mode1))
1756         result = adjust_bitfield_address (str_rtx, mode1,
1757                                           bitnum / BITS_PER_UNIT);
1758       else
1759         {
1760           str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1761                                           &bitnum);
1762           result = extract_fixed_bit_field_1 (mode, str_rtx, bitsize, bitnum,
1763                                               target, unsignedp);
1764         }
1765
1766       return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1767     }
1768
1769   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1770                               target, mode, tmode, true);
1771 }
1772 \f
1773 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1774    from bit BITNUM of OP0.
1775
1776    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1777    If TARGET is nonzero, attempts to store the value there
1778    and return TARGET, but this is not guaranteed.
1779    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1780
1781 static rtx
1782 extract_fixed_bit_field (machine_mode tmode, rtx op0,
1783                          unsigned HOST_WIDE_INT bitsize,
1784                          unsigned HOST_WIDE_INT bitnum, rtx target,
1785                          int unsignedp)
1786 {
1787   if (MEM_P (op0))
1788     {
1789       machine_mode mode
1790         = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1791                          MEM_VOLATILE_P (op0));
1792
1793       if (mode == VOIDmode)
1794         /* The only way this should occur is if the field spans word
1795            boundaries.  */
1796         return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1797
1798       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1799     }
1800
1801   return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1802                                     target, unsignedp);
1803 }
1804
1805 /* Helper function for extract_fixed_bit_field, extracts
1806    the bit field always using the MODE of OP0.  */
1807
1808 static rtx
1809 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
1810                            unsigned HOST_WIDE_INT bitsize,
1811                            unsigned HOST_WIDE_INT bitnum, rtx target,
1812                            int unsignedp)
1813 {
1814   machine_mode mode = GET_MODE (op0);
1815   gcc_assert (SCALAR_INT_MODE_P (mode));
1816
1817   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1818      for invalid input, such as extract equivalent of f5 from
1819      gcc.dg/pr48335-2.c.  */
1820
1821   if (BYTES_BIG_ENDIAN)
1822     /* BITNUM is the distance between our msb and that of OP0.
1823        Convert it to the distance from the lsb.  */
1824     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1825
1826   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1827      We have reduced the big-endian case to the little-endian case.  */
1828
1829   if (unsignedp)
1830     {
1831       if (bitnum)
1832         {
1833           /* If the field does not already start at the lsb,
1834              shift it so it does.  */
1835           /* Maybe propagate the target for the shift.  */
1836           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1837           if (tmode != mode)
1838             subtarget = 0;
1839           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1840         }
1841       /* Convert the value to the desired mode.  */
1842       if (mode != tmode)
1843         op0 = convert_to_mode (tmode, op0, 1);
1844
1845       /* Unless the msb of the field used to be the msb when we shifted,
1846          mask out the upper bits.  */
1847
1848       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
1849         return expand_binop (GET_MODE (op0), and_optab, op0,
1850                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1851                              target, 1, OPTAB_LIB_WIDEN);
1852       return op0;
1853     }
1854
1855   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1856      then arithmetic-shift its lsb to the lsb of the word.  */
1857   op0 = force_reg (mode, op0);
1858
1859   /* Find the narrowest integer mode that contains the field.  */
1860
1861   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1862        mode = GET_MODE_WIDER_MODE (mode))
1863     if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
1864       {
1865         op0 = convert_to_mode (mode, op0, 0);
1866         break;
1867       }
1868
1869   if (mode != tmode)
1870     target = 0;
1871
1872   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
1873     {
1874       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
1875       /* Maybe propagate the target for the shift.  */
1876       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1877       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1878     }
1879
1880   return expand_shift (RSHIFT_EXPR, mode, op0,
1881                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1882 }
1883
1884 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1885    VALUE << BITPOS.  */
1886
1887 static rtx
1888 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
1889               int bitpos)
1890 {
1891   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
1892 }
1893 \f
1894 /* Extract a bit field that is split across two words
1895    and return an RTX for the result.
1896
1897    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1898    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1899    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1900
1901 static rtx
1902 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1903                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1904 {
1905   unsigned int unit;
1906   unsigned int bitsdone = 0;
1907   rtx result = NULL_RTX;
1908   int first = 1;
1909
1910   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1911      much at a time.  */
1912   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1913     unit = BITS_PER_WORD;
1914   else
1915     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1916
1917   while (bitsdone < bitsize)
1918     {
1919       unsigned HOST_WIDE_INT thissize;
1920       rtx part, word;
1921       unsigned HOST_WIDE_INT thispos;
1922       unsigned HOST_WIDE_INT offset;
1923
1924       offset = (bitpos + bitsdone) / unit;
1925       thispos = (bitpos + bitsdone) % unit;
1926
1927       /* THISSIZE must not overrun a word boundary.  Otherwise,
1928          extract_fixed_bit_field will call us again, and we will mutually
1929          recurse forever.  */
1930       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1931       thissize = MIN (thissize, unit - thispos);
1932
1933       /* If OP0 is a register, then handle OFFSET here.
1934
1935          When handling multiword bitfields, extract_bit_field may pass
1936          down a word_mode SUBREG of a larger REG for a bitfield that actually
1937          crosses a word boundary.  Thus, for a SUBREG, we must find
1938          the current word starting from the base register.  */
1939       if (GET_CODE (op0) == SUBREG)
1940         {
1941           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1942           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1943                                         GET_MODE (SUBREG_REG (op0)));
1944           offset = 0;
1945         }
1946       else if (REG_P (op0))
1947         {
1948           word = operand_subword_force (op0, offset, GET_MODE (op0));
1949           offset = 0;
1950         }
1951       else
1952         word = op0;
1953
1954       /* Extract the parts in bit-counting order,
1955          whose meaning is determined by BYTES_PER_UNIT.
1956          OFFSET is in UNITs, and UNIT is in bits.  */
1957       part = extract_fixed_bit_field (word_mode, word, thissize,
1958                                       offset * unit + thispos, 0, 1);
1959       bitsdone += thissize;
1960
1961       /* Shift this part into place for the result.  */
1962       if (BYTES_BIG_ENDIAN)
1963         {
1964           if (bitsize != bitsdone)
1965             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1966                                  bitsize - bitsdone, 0, 1);
1967         }
1968       else
1969         {
1970           if (bitsdone != thissize)
1971             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1972                                  bitsdone - thissize, 0, 1);
1973         }
1974
1975       if (first)
1976         result = part;
1977       else
1978         /* Combine the parts with bitwise or.  This works
1979            because we extracted each part as an unsigned bit field.  */
1980         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1981                                OPTAB_LIB_WIDEN);
1982
1983       first = 0;
1984     }
1985
1986   /* Unsigned bit field: we are done.  */
1987   if (unsignedp)
1988     return result;
1989   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1990   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1991                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
1992   return expand_shift (RSHIFT_EXPR, word_mode, result,
1993                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
1994 }
1995 \f
1996 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
1997    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
1998    MODE, fill the upper bits with zeros.  Fail if the layout of either
1999    mode is unknown (as for CC modes) or if the extraction would involve
2000    unprofitable mode punning.  Return the value on success, otherwise
2001    return null.
2002
2003    This is different from gen_lowpart* in these respects:
2004
2005      - the returned value must always be considered an rvalue
2006
2007      - when MODE is wider than SRC_MODE, the extraction involves
2008        a zero extension
2009
2010      - when MODE is smaller than SRC_MODE, the extraction involves
2011        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2012
2013    In other words, this routine performs a computation, whereas the
2014    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2015    operations.  */
2016
2017 rtx
2018 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2019 {
2020   machine_mode int_mode, src_int_mode;
2021
2022   if (mode == src_mode)
2023     return src;
2024
2025   if (CONSTANT_P (src))
2026     {
2027       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2028          fails, it will happily create (subreg (symbol_ref)) or similar
2029          invalid SUBREGs.  */
2030       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2031       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2032       if (ret)
2033         return ret;
2034
2035       if (GET_MODE (src) == VOIDmode
2036           || !validate_subreg (mode, src_mode, src, byte))
2037         return NULL_RTX;
2038
2039       src = force_reg (GET_MODE (src), src);
2040       return gen_rtx_SUBREG (mode, src, byte);
2041     }
2042
2043   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2044     return NULL_RTX;
2045
2046   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2047       && MODES_TIEABLE_P (mode, src_mode))
2048     {
2049       rtx x = gen_lowpart_common (mode, src);
2050       if (x)
2051         return x;
2052     }
2053
2054   src_int_mode = int_mode_for_mode (src_mode);
2055   int_mode = int_mode_for_mode (mode);
2056   if (src_int_mode == BLKmode || int_mode == BLKmode)
2057     return NULL_RTX;
2058
2059   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2060     return NULL_RTX;
2061   if (!MODES_TIEABLE_P (int_mode, mode))
2062     return NULL_RTX;
2063
2064   src = gen_lowpart (src_int_mode, src);
2065   src = convert_modes (int_mode, src_int_mode, src, true);
2066   src = gen_lowpart (mode, src);
2067   return src;
2068 }
2069 \f
2070 /* Add INC into TARGET.  */
2071
2072 void
2073 expand_inc (rtx target, rtx inc)
2074 {
2075   rtx value = expand_binop (GET_MODE (target), add_optab,
2076                             target, inc,
2077                             target, 0, OPTAB_LIB_WIDEN);
2078   if (value != target)
2079     emit_move_insn (target, value);
2080 }
2081
2082 /* Subtract DEC from TARGET.  */
2083
2084 void
2085 expand_dec (rtx target, rtx dec)
2086 {
2087   rtx value = expand_binop (GET_MODE (target), sub_optab,
2088                             target, dec,
2089                             target, 0, OPTAB_LIB_WIDEN);
2090   if (value != target)
2091     emit_move_insn (target, value);
2092 }
2093 \f
2094 /* Output a shift instruction for expression code CODE,
2095    with SHIFTED being the rtx for the value to shift,
2096    and AMOUNT the rtx for the amount to shift by.
2097    Store the result in the rtx TARGET, if that is convenient.
2098    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2099    Return the rtx for where the value is.  */
2100
2101 static rtx
2102 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2103                 rtx amount, rtx target, int unsignedp)
2104 {
2105   rtx op1, temp = 0;
2106   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2107   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2108   optab lshift_optab = ashl_optab;
2109   optab rshift_arith_optab = ashr_optab;
2110   optab rshift_uns_optab = lshr_optab;
2111   optab lrotate_optab = rotl_optab;
2112   optab rrotate_optab = rotr_optab;
2113   machine_mode op1_mode;
2114   machine_mode scalar_mode = mode;
2115   int attempt;
2116   bool speed = optimize_insn_for_speed_p ();
2117
2118   if (VECTOR_MODE_P (mode))
2119     scalar_mode = GET_MODE_INNER (mode);
2120   op1 = amount;
2121   op1_mode = GET_MODE (op1);
2122
2123   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2124      shift amount is a vector, use the vector/vector shift patterns.  */
2125   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2126     {
2127       lshift_optab = vashl_optab;
2128       rshift_arith_optab = vashr_optab;
2129       rshift_uns_optab = vlshr_optab;
2130       lrotate_optab = vrotl_optab;
2131       rrotate_optab = vrotr_optab;
2132     }
2133
2134   /* Previously detected shift-counts computed by NEGATE_EXPR
2135      and shifted in the other direction; but that does not work
2136      on all machines.  */
2137
2138   if (SHIFT_COUNT_TRUNCATED)
2139     {
2140       if (CONST_INT_P (op1)
2141           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2142               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2143         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2144                        % GET_MODE_BITSIZE (scalar_mode));
2145       else if (GET_CODE (op1) == SUBREG
2146                && subreg_lowpart_p (op1)
2147                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2148                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2149         op1 = SUBREG_REG (op1);
2150     }
2151
2152   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2153      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2154      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2155      amount instead.  */
2156   if (rotate
2157       && CONST_INT_P (op1)
2158       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2159                    GET_MODE_BITSIZE (scalar_mode) - 1))
2160     {
2161       op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2162       left = !left;
2163       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2164     }
2165
2166   if (op1 == const0_rtx)
2167     return shifted;
2168
2169   /* Check whether its cheaper to implement a left shift by a constant
2170      bit count by a sequence of additions.  */
2171   if (code == LSHIFT_EXPR
2172       && CONST_INT_P (op1)
2173       && INTVAL (op1) > 0
2174       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2175       && INTVAL (op1) < MAX_BITS_PER_WORD
2176       && (shift_cost (speed, mode, INTVAL (op1))
2177           > INTVAL (op1) * add_cost (speed, mode))
2178       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2179     {
2180       int i;
2181       for (i = 0; i < INTVAL (op1); i++)
2182         {
2183           temp = force_reg (mode, shifted);
2184           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2185                                   unsignedp, OPTAB_LIB_WIDEN);
2186         }
2187       return shifted;
2188     }
2189
2190   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2191     {
2192       enum optab_methods methods;
2193
2194       if (attempt == 0)
2195         methods = OPTAB_DIRECT;
2196       else if (attempt == 1)
2197         methods = OPTAB_WIDEN;
2198       else
2199         methods = OPTAB_LIB_WIDEN;
2200
2201       if (rotate)
2202         {
2203           /* Widening does not work for rotation.  */
2204           if (methods == OPTAB_WIDEN)
2205             continue;
2206           else if (methods == OPTAB_LIB_WIDEN)
2207             {
2208               /* If we have been unable to open-code this by a rotation,
2209                  do it as the IOR of two shifts.  I.e., to rotate A
2210                  by N bits, compute
2211                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2212                  where C is the bitsize of A.
2213
2214                  It is theoretically possible that the target machine might
2215                  not be able to perform either shift and hence we would
2216                  be making two libcalls rather than just the one for the
2217                  shift (similarly if IOR could not be done).  We will allow
2218                  this extremely unlikely lossage to avoid complicating the
2219                  code below.  */
2220
2221               rtx subtarget = target == shifted ? 0 : target;
2222               rtx new_amount, other_amount;
2223               rtx temp1;
2224
2225               new_amount = op1;
2226               if (op1 == const0_rtx)
2227                 return shifted;
2228               else if (CONST_INT_P (op1))
2229                 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2230                                         - INTVAL (op1));
2231               else
2232                 {
2233                   other_amount
2234                     = simplify_gen_unary (NEG, GET_MODE (op1),
2235                                           op1, GET_MODE (op1));
2236                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2237                   other_amount
2238                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2239                                            gen_int_mode (mask, GET_MODE (op1)));
2240                 }
2241
2242               shifted = force_reg (mode, shifted);
2243
2244               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2245                                      mode, shifted, new_amount, 0, 1);
2246               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2247                                       mode, shifted, other_amount,
2248                                       subtarget, 1);
2249               return expand_binop (mode, ior_optab, temp, temp1, target,
2250                                    unsignedp, methods);
2251             }
2252
2253           temp = expand_binop (mode,
2254                                left ? lrotate_optab : rrotate_optab,
2255                                shifted, op1, target, unsignedp, methods);
2256         }
2257       else if (unsignedp)
2258         temp = expand_binop (mode,
2259                              left ? lshift_optab : rshift_uns_optab,
2260                              shifted, op1, target, unsignedp, methods);
2261
2262       /* Do arithmetic shifts.
2263          Also, if we are going to widen the operand, we can just as well
2264          use an arithmetic right-shift instead of a logical one.  */
2265       if (temp == 0 && ! rotate
2266           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2267         {
2268           enum optab_methods methods1 = methods;
2269
2270           /* If trying to widen a log shift to an arithmetic shift,
2271              don't accept an arithmetic shift of the same size.  */
2272           if (unsignedp)
2273             methods1 = OPTAB_MUST_WIDEN;
2274
2275           /* Arithmetic shift */
2276
2277           temp = expand_binop (mode,
2278                                left ? lshift_optab : rshift_arith_optab,
2279                                shifted, op1, target, unsignedp, methods1);
2280         }
2281
2282       /* We used to try extzv here for logical right shifts, but that was
2283          only useful for one machine, the VAX, and caused poor code
2284          generation there for lshrdi3, so the code was deleted and a
2285          define_expand for lshrsi3 was added to vax.md.  */
2286     }
2287
2288   gcc_assert (temp);
2289   return temp;
2290 }
2291
2292 /* Output a shift instruction for expression code CODE,
2293    with SHIFTED being the rtx for the value to shift,
2294    and AMOUNT the amount to shift by.
2295    Store the result in the rtx TARGET, if that is convenient.
2296    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2297    Return the rtx for where the value is.  */
2298
2299 rtx
2300 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2301               int amount, rtx target, int unsignedp)
2302 {
2303   return expand_shift_1 (code, mode,
2304                          shifted, GEN_INT (amount), target, unsignedp);
2305 }
2306
2307 /* Output a shift instruction for expression code CODE,
2308    with SHIFTED being the rtx for the value to shift,
2309    and AMOUNT the tree for the amount to shift by.
2310    Store the result in the rtx TARGET, if that is convenient.
2311    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2312    Return the rtx for where the value is.  */
2313
2314 rtx
2315 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2316                        tree amount, rtx target, int unsignedp)
2317 {
2318   return expand_shift_1 (code, mode,
2319                          shifted, expand_normal (amount), target, unsignedp);
2320 }
2321
2322 \f
2323 /* Indicates the type of fixup needed after a constant multiplication.
2324    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2325    the result should be negated, and ADD_VARIANT means that the
2326    multiplicand should be added to the result.  */
2327 enum mult_variant {basic_variant, negate_variant, add_variant};
2328
2329 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2330                         const struct mult_cost *, machine_mode mode);
2331 static bool choose_mult_variant (machine_mode, HOST_WIDE_INT,
2332                                  struct algorithm *, enum mult_variant *, int);
2333 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2334                               const struct algorithm *, enum mult_variant);
2335 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2336 static rtx extract_high_half (machine_mode, rtx);
2337 static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2338 static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
2339                                        int, int);
2340 /* Compute and return the best algorithm for multiplying by T.
2341    The algorithm must cost less than cost_limit
2342    If retval.cost >= COST_LIMIT, no algorithm was found and all
2343    other field of the returned struct are undefined.
2344    MODE is the machine mode of the multiplication.  */
2345
2346 static void
2347 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2348             const struct mult_cost *cost_limit, machine_mode mode)
2349 {
2350   int m;
2351   struct algorithm *alg_in, *best_alg;
2352   struct mult_cost best_cost;
2353   struct mult_cost new_limit;
2354   int op_cost, op_latency;
2355   unsigned HOST_WIDE_INT orig_t = t;
2356   unsigned HOST_WIDE_INT q;
2357   int maxm, hash_index;
2358   bool cache_hit = false;
2359   enum alg_code cache_alg = alg_zero;
2360   bool speed = optimize_insn_for_speed_p ();
2361   machine_mode imode;
2362   struct alg_hash_entry *entry_ptr;
2363
2364   /* Indicate that no algorithm is yet found.  If no algorithm
2365      is found, this value will be returned and indicate failure.  */
2366   alg_out->cost.cost = cost_limit->cost + 1;
2367   alg_out->cost.latency = cost_limit->latency + 1;
2368
2369   if (cost_limit->cost < 0
2370       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2371     return;
2372
2373   /* Be prepared for vector modes.  */
2374   imode = GET_MODE_INNER (mode);
2375   if (imode == VOIDmode)
2376     imode = mode;
2377
2378   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2379
2380   /* Restrict the bits of "t" to the multiplication's mode.  */
2381   t &= GET_MODE_MASK (imode);
2382
2383   /* t == 1 can be done in zero cost.  */
2384   if (t == 1)
2385     {
2386       alg_out->ops = 1;
2387       alg_out->cost.cost = 0;
2388       alg_out->cost.latency = 0;
2389       alg_out->op[0] = alg_m;
2390       return;
2391     }
2392
2393   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2394      fail now.  */
2395   if (t == 0)
2396     {
2397       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2398         return;
2399       else
2400         {
2401           alg_out->ops = 1;
2402           alg_out->cost.cost = zero_cost (speed);
2403           alg_out->cost.latency = zero_cost (speed);
2404           alg_out->op[0] = alg_zero;
2405           return;
2406         }
2407     }
2408
2409   /* We'll be needing a couple extra algorithm structures now.  */
2410
2411   alg_in = XALLOCA (struct algorithm);
2412   best_alg = XALLOCA (struct algorithm);
2413   best_cost = *cost_limit;
2414
2415   /* Compute the hash index.  */
2416   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2417
2418   /* See if we already know what to do for T.  */
2419   entry_ptr = alg_hash_entry_ptr (hash_index);
2420   if (entry_ptr->t == t
2421       && entry_ptr->mode == mode
2422       && entry_ptr->mode == mode
2423       && entry_ptr->speed == speed
2424       && entry_ptr->alg != alg_unknown)
2425     {
2426       cache_alg = entry_ptr->alg;
2427
2428       if (cache_alg == alg_impossible)
2429         {
2430           /* The cache tells us that it's impossible to synthesize
2431              multiplication by T within entry_ptr->cost.  */
2432           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2433             /* COST_LIMIT is at least as restrictive as the one
2434                recorded in the hash table, in which case we have no
2435                hope of synthesizing a multiplication.  Just
2436                return.  */
2437             return;
2438
2439           /* If we get here, COST_LIMIT is less restrictive than the
2440              one recorded in the hash table, so we may be able to
2441              synthesize a multiplication.  Proceed as if we didn't
2442              have the cache entry.  */
2443         }
2444       else
2445         {
2446           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2447             /* The cached algorithm shows that this multiplication
2448                requires more cost than COST_LIMIT.  Just return.  This
2449                way, we don't clobber this cache entry with
2450                alg_impossible but retain useful information.  */
2451             return;
2452
2453           cache_hit = true;
2454
2455           switch (cache_alg)
2456             {
2457             case alg_shift:
2458               goto do_alg_shift;
2459
2460             case alg_add_t_m2:
2461             case alg_sub_t_m2:
2462               goto do_alg_addsub_t_m2;
2463
2464             case alg_add_factor:
2465             case alg_sub_factor:
2466               goto do_alg_addsub_factor;
2467
2468             case alg_add_t2_m:
2469               goto do_alg_add_t2_m;
2470
2471             case alg_sub_t2_m:
2472               goto do_alg_sub_t2_m;
2473
2474             default:
2475               gcc_unreachable ();
2476             }
2477         }
2478     }
2479
2480   /* If we have a group of zero bits at the low-order part of T, try
2481      multiplying by the remaining bits and then doing a shift.  */
2482
2483   if ((t & 1) == 0)
2484     {
2485     do_alg_shift:
2486       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2487       if (m < maxm)
2488         {
2489           q = t >> m;
2490           /* The function expand_shift will choose between a shift and
2491              a sequence of additions, so the observed cost is given as
2492              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2493           op_cost = m * add_cost (speed, mode);
2494           if (shift_cost (speed, mode, m) < op_cost)
2495             op_cost = shift_cost (speed, mode, m);
2496           new_limit.cost = best_cost.cost - op_cost;
2497           new_limit.latency = best_cost.latency - op_cost;
2498           synth_mult (alg_in, q, &new_limit, mode);
2499
2500           alg_in->cost.cost += op_cost;
2501           alg_in->cost.latency += op_cost;
2502           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2503             {
2504               struct algorithm *x;
2505               best_cost = alg_in->cost;
2506               x = alg_in, alg_in = best_alg, best_alg = x;
2507               best_alg->log[best_alg->ops] = m;
2508               best_alg->op[best_alg->ops] = alg_shift;
2509             }
2510
2511           /* See if treating ORIG_T as a signed number yields a better
2512              sequence.  Try this sequence only for a negative ORIG_T
2513              as it would be useless for a non-negative ORIG_T.  */
2514           if ((HOST_WIDE_INT) orig_t < 0)
2515             {
2516               /* Shift ORIG_T as follows because a right shift of a
2517                  negative-valued signed type is implementation
2518                  defined.  */
2519               q = ~(~orig_t >> m);
2520               /* The function expand_shift will choose between a shift
2521                  and a sequence of additions, so the observed cost is
2522                  given as MIN (m * add_cost(speed, mode),
2523                  shift_cost(speed, mode, m)).  */
2524               op_cost = m * add_cost (speed, mode);
2525               if (shift_cost (speed, mode, m) < op_cost)
2526                 op_cost = shift_cost (speed, mode, m);
2527               new_limit.cost = best_cost.cost - op_cost;
2528               new_limit.latency = best_cost.latency - op_cost;
2529               synth_mult (alg_in, q, &new_limit, mode);
2530
2531               alg_in->cost.cost += op_cost;
2532               alg_in->cost.latency += op_cost;
2533               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2534                 {
2535                   struct algorithm *x;
2536                   best_cost = alg_in->cost;
2537                   x = alg_in, alg_in = best_alg, best_alg = x;
2538                   best_alg->log[best_alg->ops] = m;
2539                   best_alg->op[best_alg->ops] = alg_shift;
2540                 }
2541             }
2542         }
2543       if (cache_hit)
2544         goto done;
2545     }
2546
2547   /* If we have an odd number, add or subtract one.  */
2548   if ((t & 1) != 0)
2549     {
2550       unsigned HOST_WIDE_INT w;
2551
2552     do_alg_addsub_t_m2:
2553       for (w = 1; (w & t) != 0; w <<= 1)
2554         ;
2555       /* If T was -1, then W will be zero after the loop.  This is another
2556          case where T ends with ...111.  Handling this with (T + 1) and
2557          subtract 1 produces slightly better code and results in algorithm
2558          selection much faster than treating it like the ...0111 case
2559          below.  */
2560       if (w == 0
2561           || (w > 2
2562               /* Reject the case where t is 3.
2563                  Thus we prefer addition in that case.  */
2564               && t != 3))
2565         {
2566           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2567
2568           op_cost = add_cost (speed, mode);
2569           new_limit.cost = best_cost.cost - op_cost;
2570           new_limit.latency = best_cost.latency - op_cost;
2571           synth_mult (alg_in, t + 1, &new_limit, mode);
2572
2573           alg_in->cost.cost += op_cost;
2574           alg_in->cost.latency += op_cost;
2575           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2576             {
2577               struct algorithm *x;
2578               best_cost = alg_in->cost;
2579               x = alg_in, alg_in = best_alg, best_alg = x;
2580               best_alg->log[best_alg->ops] = 0;
2581               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2582             }
2583         }
2584       else
2585         {
2586           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2587
2588           op_cost = add_cost (speed, mode);
2589           new_limit.cost = best_cost.cost - op_cost;
2590           new_limit.latency = best_cost.latency - op_cost;
2591           synth_mult (alg_in, t - 1, &new_limit, mode);
2592
2593           alg_in->cost.cost += op_cost;
2594           alg_in->cost.latency += op_cost;
2595           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2596             {
2597               struct algorithm *x;
2598               best_cost = alg_in->cost;
2599               x = alg_in, alg_in = best_alg, best_alg = x;
2600               best_alg->log[best_alg->ops] = 0;
2601               best_alg->op[best_alg->ops] = alg_add_t_m2;
2602             }
2603         }
2604
2605       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2606          quickly with a - a * n for some appropriate constant n.  */
2607       m = exact_log2 (-orig_t + 1);
2608       if (m >= 0 && m < maxm)
2609         {
2610           op_cost = shiftsub1_cost (speed, mode, m);
2611           new_limit.cost = best_cost.cost - op_cost;
2612           new_limit.latency = best_cost.latency - op_cost;
2613           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2614                       &new_limit, mode);
2615
2616           alg_in->cost.cost += op_cost;
2617           alg_in->cost.latency += op_cost;
2618           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2619             {
2620               struct algorithm *x;
2621               best_cost = alg_in->cost;
2622               x = alg_in, alg_in = best_alg, best_alg = x;
2623               best_alg->log[best_alg->ops] = m;
2624               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2625             }
2626         }
2627
2628       if (cache_hit)
2629         goto done;
2630     }
2631
2632   /* Look for factors of t of the form
2633      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2634      If we find such a factor, we can multiply by t using an algorithm that
2635      multiplies by q, shift the result by m and add/subtract it to itself.
2636
2637      We search for large factors first and loop down, even if large factors
2638      are less probable than small; if we find a large factor we will find a
2639      good sequence quickly, and therefore be able to prune (by decreasing
2640      COST_LIMIT) the search.  */
2641
2642  do_alg_addsub_factor:
2643   for (m = floor_log2 (t - 1); m >= 2; m--)
2644     {
2645       unsigned HOST_WIDE_INT d;
2646
2647       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2648       if (t % d == 0 && t > d && m < maxm
2649           && (!cache_hit || cache_alg == alg_add_factor))
2650         {
2651           /* If the target has a cheap shift-and-add instruction use
2652              that in preference to a shift insn followed by an add insn.
2653              Assume that the shift-and-add is "atomic" with a latency
2654              equal to its cost, otherwise assume that on superscalar
2655              hardware the shift may be executed concurrently with the
2656              earlier steps in the algorithm.  */
2657           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2658           if (shiftadd_cost (speed, mode, m) < op_cost)
2659             {
2660               op_cost = shiftadd_cost (speed, mode, m);
2661               op_latency = op_cost;
2662             }
2663           else
2664             op_latency = add_cost (speed, mode);
2665
2666           new_limit.cost = best_cost.cost - op_cost;
2667           new_limit.latency = best_cost.latency - op_latency;
2668           synth_mult (alg_in, t / d, &new_limit, mode);
2669
2670           alg_in->cost.cost += op_cost;
2671           alg_in->cost.latency += op_latency;
2672           if (alg_in->cost.latency < op_cost)
2673             alg_in->cost.latency = op_cost;
2674           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2675             {
2676               struct algorithm *x;
2677               best_cost = alg_in->cost;
2678               x = alg_in, alg_in = best_alg, best_alg = x;
2679               best_alg->log[best_alg->ops] = m;
2680               best_alg->op[best_alg->ops] = alg_add_factor;
2681             }
2682           /* Other factors will have been taken care of in the recursion.  */
2683           break;
2684         }
2685
2686       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2687       if (t % d == 0 && t > d && m < maxm
2688           && (!cache_hit || cache_alg == alg_sub_factor))
2689         {
2690           /* If the target has a cheap shift-and-subtract insn use
2691              that in preference to a shift insn followed by a sub insn.
2692              Assume that the shift-and-sub is "atomic" with a latency
2693              equal to it's cost, otherwise assume that on superscalar
2694              hardware the shift may be executed concurrently with the
2695              earlier steps in the algorithm.  */
2696           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2697           if (shiftsub0_cost (speed, mode, m) < op_cost)
2698             {
2699               op_cost = shiftsub0_cost (speed, mode, m);
2700               op_latency = op_cost;
2701             }
2702           else
2703             op_latency = add_cost (speed, mode);
2704
2705           new_limit.cost = best_cost.cost - op_cost;
2706           new_limit.latency = best_cost.latency - op_latency;
2707           synth_mult (alg_in, t / d, &new_limit, mode);
2708
2709           alg_in->cost.cost += op_cost;
2710           alg_in->cost.latency += op_latency;
2711           if (alg_in->cost.latency < op_cost)
2712             alg_in->cost.latency = op_cost;
2713           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2714             {
2715               struct algorithm *x;
2716               best_cost = alg_in->cost;
2717               x = alg_in, alg_in = best_alg, best_alg = x;
2718               best_alg->log[best_alg->ops] = m;
2719               best_alg->op[best_alg->ops] = alg_sub_factor;
2720             }
2721           break;
2722         }
2723     }
2724   if (cache_hit)
2725     goto done;
2726
2727   /* Try shift-and-add (load effective address) instructions,
2728      i.e. do a*3, a*5, a*9.  */
2729   if ((t & 1) != 0)
2730     {
2731     do_alg_add_t2_m:
2732       q = t - 1;
2733       q = q & -q;
2734       m = exact_log2 (q);
2735       if (m >= 0 && m < maxm)
2736         {
2737           op_cost = shiftadd_cost (speed, mode, m);
2738           new_limit.cost = best_cost.cost - op_cost;
2739           new_limit.latency = best_cost.latency - op_cost;
2740           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2741
2742           alg_in->cost.cost += op_cost;
2743           alg_in->cost.latency += op_cost;
2744           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2745             {
2746               struct algorithm *x;
2747               best_cost = alg_in->cost;
2748               x = alg_in, alg_in = best_alg, best_alg = x;
2749               best_alg->log[best_alg->ops] = m;
2750               best_alg->op[best_alg->ops] = alg_add_t2_m;
2751             }
2752         }
2753       if (cache_hit)
2754         goto done;
2755
2756     do_alg_sub_t2_m:
2757       q = t + 1;
2758       q = q & -q;
2759       m = exact_log2 (q);
2760       if (m >= 0 && m < maxm)
2761         {
2762           op_cost = shiftsub0_cost (speed, mode, m);
2763           new_limit.cost = best_cost.cost - op_cost;
2764           new_limit.latency = best_cost.latency - op_cost;
2765           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2766
2767           alg_in->cost.cost += op_cost;
2768           alg_in->cost.latency += op_cost;
2769           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2770             {
2771               struct algorithm *x;
2772               best_cost = alg_in->cost;
2773               x = alg_in, alg_in = best_alg, best_alg = x;
2774               best_alg->log[best_alg->ops] = m;
2775               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2776             }
2777         }
2778       if (cache_hit)
2779         goto done;
2780     }
2781
2782  done:
2783   /* If best_cost has not decreased, we have not found any algorithm.  */
2784   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2785     {
2786       /* We failed to find an algorithm.  Record alg_impossible for
2787          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2788          we are asked to find an algorithm for T within the same or
2789          lower COST_LIMIT, we can immediately return to the
2790          caller.  */
2791       entry_ptr->t = t;
2792       entry_ptr->mode = mode;
2793       entry_ptr->speed = speed;
2794       entry_ptr->alg = alg_impossible;
2795       entry_ptr->cost = *cost_limit;
2796       return;
2797     }
2798
2799   /* Cache the result.  */
2800   if (!cache_hit)
2801     {
2802       entry_ptr->t = t;
2803       entry_ptr->mode = mode;
2804       entry_ptr->speed = speed;
2805       entry_ptr->alg = best_alg->op[best_alg->ops];
2806       entry_ptr->cost.cost = best_cost.cost;
2807       entry_ptr->cost.latency = best_cost.latency;
2808     }
2809
2810   /* If we are getting a too long sequence for `struct algorithm'
2811      to record, make this search fail.  */
2812   if (best_alg->ops == MAX_BITS_PER_WORD)
2813     return;
2814
2815   /* Copy the algorithm from temporary space to the space at alg_out.
2816      We avoid using structure assignment because the majority of
2817      best_alg is normally undefined, and this is a critical function.  */
2818   alg_out->ops = best_alg->ops + 1;
2819   alg_out->cost = best_cost;
2820   memcpy (alg_out->op, best_alg->op,
2821           alg_out->ops * sizeof *alg_out->op);
2822   memcpy (alg_out->log, best_alg->log,
2823           alg_out->ops * sizeof *alg_out->log);
2824 }
2825 \f
2826 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2827    Try three variations:
2828
2829        - a shift/add sequence based on VAL itself
2830        - a shift/add sequence based on -VAL, followed by a negation
2831        - a shift/add sequence based on VAL - 1, followed by an addition.
2832
2833    Return true if the cheapest of these cost less than MULT_COST,
2834    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2835
2836 static bool
2837 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
2838                      struct algorithm *alg, enum mult_variant *variant,
2839                      int mult_cost)
2840 {
2841   struct algorithm alg2;
2842   struct mult_cost limit;
2843   int op_cost;
2844   bool speed = optimize_insn_for_speed_p ();
2845
2846   /* Fail quickly for impossible bounds.  */
2847   if (mult_cost < 0)
2848     return false;
2849
2850   /* Ensure that mult_cost provides a reasonable upper bound.
2851      Any constant multiplication can be performed with less
2852      than 2 * bits additions.  */
2853   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2854   if (mult_cost > op_cost)
2855     mult_cost = op_cost;
2856
2857   *variant = basic_variant;
2858   limit.cost = mult_cost;
2859   limit.latency = mult_cost;
2860   synth_mult (alg, val, &limit, mode);
2861
2862   /* This works only if the inverted value actually fits in an
2863      `unsigned int' */
2864   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2865     {
2866       op_cost = neg_cost (speed, mode);
2867       if (MULT_COST_LESS (&alg->cost, mult_cost))
2868         {
2869           limit.cost = alg->cost.cost - op_cost;
2870           limit.latency = alg->cost.latency - op_cost;
2871         }
2872       else
2873         {
2874           limit.cost = mult_cost - op_cost;
2875           limit.latency = mult_cost - op_cost;
2876         }
2877
2878       synth_mult (&alg2, -val, &limit, mode);
2879       alg2.cost.cost += op_cost;
2880       alg2.cost.latency += op_cost;
2881       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2882         *alg = alg2, *variant = negate_variant;
2883     }
2884
2885   /* This proves very useful for division-by-constant.  */
2886   op_cost = add_cost (speed, mode);
2887   if (MULT_COST_LESS (&alg->cost, mult_cost))
2888     {
2889       limit.cost = alg->cost.cost - op_cost;
2890       limit.latency = alg->cost.latency - op_cost;
2891     }
2892   else
2893     {
2894       limit.cost = mult_cost - op_cost;
2895       limit.latency = mult_cost - op_cost;
2896     }
2897
2898   synth_mult (&alg2, val - 1, &limit, mode);
2899   alg2.cost.cost += op_cost;
2900   alg2.cost.latency += op_cost;
2901   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2902     *alg = alg2, *variant = add_variant;
2903
2904   return MULT_COST_LESS (&alg->cost, mult_cost);
2905 }
2906
2907 /* A subroutine of expand_mult, used for constant multiplications.
2908    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2909    convenient.  Use the shift/add sequence described by ALG and apply
2910    the final fixup specified by VARIANT.  */
2911
2912 static rtx
2913 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
2914                    rtx target, const struct algorithm *alg,
2915                    enum mult_variant variant)
2916 {
2917   HOST_WIDE_INT val_so_far;
2918   rtx_insn *insn;
2919   rtx accum, tem;
2920   int opno;
2921   machine_mode nmode;
2922
2923   /* Avoid referencing memory over and over and invalid sharing
2924      on SUBREGs.  */
2925   op0 = force_reg (mode, op0);
2926
2927   /* ACCUM starts out either as OP0 or as a zero, depending on
2928      the first operation.  */
2929
2930   if (alg->op[0] == alg_zero)
2931     {
2932       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2933       val_so_far = 0;
2934     }
2935   else if (alg->op[0] == alg_m)
2936     {
2937       accum = copy_to_mode_reg (mode, op0);
2938       val_so_far = 1;
2939     }
2940   else
2941     gcc_unreachable ();
2942
2943   for (opno = 1; opno < alg->ops; opno++)
2944     {
2945       int log = alg->log[opno];
2946       rtx shift_subtarget = optimize ? 0 : accum;
2947       rtx add_target
2948         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2949            && !optimize)
2950           ? target : 0;
2951       rtx accum_target = optimize ? 0 : accum;
2952       rtx accum_inner;
2953
2954       switch (alg->op[opno])
2955         {
2956         case alg_shift:
2957           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2958           /* REG_EQUAL note will be attached to the following insn.  */
2959           emit_move_insn (accum, tem);
2960           val_so_far <<= log;
2961           break;
2962
2963         case alg_add_t_m2:
2964           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2965           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2966                                  add_target ? add_target : accum_target);
2967           val_so_far += (HOST_WIDE_INT) 1 << log;
2968           break;
2969
2970         case alg_sub_t_m2:
2971           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2972           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2973                                  add_target ? add_target : accum_target);
2974           val_so_far -= (HOST_WIDE_INT) 1 << log;
2975           break;
2976
2977         case alg_add_t2_m:
2978           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2979                                 log, shift_subtarget, 0);
2980           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2981                                  add_target ? add_target : accum_target);
2982           val_so_far = (val_so_far << log) + 1;
2983           break;
2984
2985         case alg_sub_t2_m:
2986           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2987                                 log, shift_subtarget, 0);
2988           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2989                                  add_target ? add_target : accum_target);
2990           val_so_far = (val_so_far << log) - 1;
2991           break;
2992
2993         case alg_add_factor:
2994           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2995           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2996                                  add_target ? add_target : accum_target);
2997           val_so_far += val_so_far << log;
2998           break;
2999
3000         case alg_sub_factor:
3001           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3002           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3003                                  (add_target
3004                                   ? add_target : (optimize ? 0 : tem)));
3005           val_so_far = (val_so_far << log) - val_so_far;
3006           break;
3007
3008         default:
3009           gcc_unreachable ();
3010         }
3011
3012       if (SCALAR_INT_MODE_P (mode))
3013         {
3014           /* Write a REG_EQUAL note on the last insn so that we can cse
3015              multiplication sequences.  Note that if ACCUM is a SUBREG,
3016              we've set the inner register and must properly indicate that.  */
3017           tem = op0, nmode = mode;
3018           accum_inner = accum;
3019           if (GET_CODE (accum) == SUBREG)
3020             {
3021               accum_inner = SUBREG_REG (accum);
3022               nmode = GET_MODE (accum_inner);
3023               tem = gen_lowpart (nmode, op0);
3024             }
3025
3026           insn = get_last_insn ();
3027           set_dst_reg_note (insn, REG_EQUAL,
3028                             gen_rtx_MULT (nmode, tem,
3029                                           gen_int_mode (val_so_far, nmode)),
3030                             accum_inner);
3031         }
3032     }
3033
3034   if (variant == negate_variant)
3035     {
3036       val_so_far = -val_so_far;
3037       accum = expand_unop (mode, neg_optab, accum, target, 0);
3038     }
3039   else if (variant == add_variant)
3040     {
3041       val_so_far = val_so_far + 1;
3042       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3043     }
3044
3045   /* Compare only the bits of val and val_so_far that are significant
3046      in the result mode, to avoid sign-/zero-extension confusion.  */
3047   nmode = GET_MODE_INNER (mode);
3048   if (nmode == VOIDmode)
3049     nmode = mode;
3050   val &= GET_MODE_MASK (nmode);
3051   val_so_far &= GET_MODE_MASK (nmode);
3052   gcc_assert (val == val_so_far);
3053
3054   return accum;
3055 }
3056
3057 /* Perform a multiplication and return an rtx for the result.
3058    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3059    TARGET is a suggestion for where to store the result (an rtx).
3060
3061    We check specially for a constant integer as OP1.
3062    If you want this check for OP0 as well, then before calling
3063    you should swap the two operands if OP0 would be constant.  */
3064
3065 rtx
3066 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3067              int unsignedp)
3068 {
3069   enum mult_variant variant;
3070   struct algorithm algorithm;
3071   rtx scalar_op1;
3072   int max_cost;
3073   bool speed = optimize_insn_for_speed_p ();
3074   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3075
3076   if (CONSTANT_P (op0))
3077     {
3078       rtx temp = op0;
3079       op0 = op1;
3080       op1 = temp;
3081     }
3082
3083   /* For vectors, there are several simplifications that can be made if
3084      all elements of the vector constant are identical.  */
3085   scalar_op1 = op1;
3086   if (GET_CODE (op1) == CONST_VECTOR)
3087     {
3088       int i, n = CONST_VECTOR_NUNITS (op1);
3089       scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3090       for (i = 1; i < n; ++i)
3091         if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3092           goto skip_scalar;
3093     }
3094
3095   if (INTEGRAL_MODE_P (mode))
3096     {
3097       rtx fake_reg;
3098       HOST_WIDE_INT coeff;
3099       bool is_neg;
3100       int mode_bitsize;
3101
3102       if (op1 == CONST0_RTX (mode))
3103         return op1;
3104       if (op1 == CONST1_RTX (mode))
3105         return op0;
3106       if (op1 == CONSTM1_RTX (mode))
3107         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3108                             op0, target, 0);
3109
3110       if (do_trapv)
3111         goto skip_synth;
3112
3113       /* If mode is integer vector mode, check if the backend supports
3114          vector lshift (by scalar or vector) at all.  If not, we can't use
3115          synthetized multiply.  */
3116       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3117           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3118           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3119         goto skip_synth;
3120
3121       /* These are the operations that are potentially turned into
3122          a sequence of shifts and additions.  */
3123       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3124
3125       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3126          less than or equal in size to `unsigned int' this doesn't matter.
3127          If the mode is larger than `unsigned int', then synth_mult works
3128          only if the constant value exactly fits in an `unsigned int' without
3129          any truncation.  This means that multiplying by negative values does
3130          not work; results are off by 2^32 on a 32 bit machine.  */
3131       if (CONST_INT_P (scalar_op1))
3132         {
3133           coeff = INTVAL (scalar_op1);
3134           is_neg = coeff < 0;
3135         }
3136 #if TARGET_SUPPORTS_WIDE_INT
3137       else if (CONST_WIDE_INT_P (scalar_op1))
3138 #else
3139       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3140 #endif
3141         {
3142           int shift = wi::exact_log2 (std::make_pair (scalar_op1, mode));
3143           /* Perfect power of 2 (other than 1, which is handled above).  */
3144           if (shift > 0)
3145             return expand_shift (LSHIFT_EXPR, mode, op0,
3146                                  shift, target, unsignedp);
3147           else
3148             goto skip_synth;
3149         }
3150       else
3151         goto skip_synth;
3152
3153       /* We used to test optimize here, on the grounds that it's better to
3154          produce a smaller program when -O is not used.  But this causes
3155          such a terrible slowdown sometimes that it seems better to always
3156          use synth_mult.  */
3157
3158       /* Special case powers of two.  */
3159       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3160           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3161         return expand_shift (LSHIFT_EXPR, mode, op0,
3162                              floor_log2 (coeff), target, unsignedp);
3163
3164       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3165
3166       /* Attempt to handle multiplication of DImode values by negative
3167          coefficients, by performing the multiplication by a positive
3168          multiplier and then inverting the result.  */
3169       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3170         {
3171           /* Its safe to use -coeff even for INT_MIN, as the
3172              result is interpreted as an unsigned coefficient.
3173              Exclude cost of op0 from max_cost to match the cost
3174              calculation of the synth_mult.  */
3175           coeff = -(unsigned HOST_WIDE_INT) coeff;
3176           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3177                       - neg_cost (speed, mode));
3178           if (max_cost <= 0)
3179             goto skip_synth;
3180
3181           /* Special case powers of two.  */
3182           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3183             {
3184               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3185                                        floor_log2 (coeff), target, unsignedp);
3186               return expand_unop (mode, neg_optab, temp, target, 0);
3187             }
3188
3189           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3190                                    max_cost))
3191             {
3192               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3193                                             &algorithm, variant);
3194               return expand_unop (mode, neg_optab, temp, target, 0);
3195             }
3196           goto skip_synth;
3197         }
3198
3199       /* Exclude cost of op0 from max_cost to match the cost
3200          calculation of the synth_mult.  */
3201       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3202       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3203         return expand_mult_const (mode, op0, coeff, target,
3204                                   &algorithm, variant);
3205     }
3206  skip_synth:
3207
3208   /* Expand x*2.0 as x+x.  */
3209   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3210     {
3211       REAL_VALUE_TYPE d;
3212       REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3213
3214       if (REAL_VALUES_EQUAL (d, dconst2))
3215         {
3216           op0 = force_reg (GET_MODE (op0), op0);
3217           return expand_binop (mode, add_optab, op0, op0,
3218                                target, unsignedp, OPTAB_LIB_WIDEN);
3219         }
3220     }
3221  skip_scalar:
3222
3223   /* This used to use umul_optab if unsigned, but for non-widening multiply
3224      there is no difference between signed and unsigned.  */
3225   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3226                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3227   gcc_assert (op0);
3228   return op0;
3229 }
3230
3231 /* Return a cost estimate for multiplying a register by the given
3232    COEFFicient in the given MODE and SPEED.  */
3233
3234 int
3235 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3236 {
3237   int max_cost;
3238   struct algorithm algorithm;
3239   enum mult_variant variant;
3240
3241   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3242   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3243   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3244     return algorithm.cost.cost;
3245   else
3246     return max_cost;
3247 }
3248
3249 /* Perform a widening multiplication and return an rtx for the result.
3250    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3251    TARGET is a suggestion for where to store the result (an rtx).
3252    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3253    or smul_widen_optab.
3254
3255    We check specially for a constant integer as OP1, comparing the
3256    cost of a widening multiply against the cost of a sequence of shifts
3257    and adds.  */
3258
3259 rtx
3260 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3261                       int unsignedp, optab this_optab)
3262 {
3263   bool speed = optimize_insn_for_speed_p ();
3264   rtx cop1;
3265
3266   if (CONST_INT_P (op1)
3267       && GET_MODE (op0) != VOIDmode
3268       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3269                                 this_optab == umul_widen_optab))
3270       && CONST_INT_P (cop1)
3271       && (INTVAL (cop1) >= 0
3272           || HWI_COMPUTABLE_MODE_P (mode)))
3273     {
3274       HOST_WIDE_INT coeff = INTVAL (cop1);
3275       int max_cost;
3276       enum mult_variant variant;
3277       struct algorithm algorithm;
3278
3279       /* Special case powers of two.  */
3280       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3281         {
3282           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3283           return expand_shift (LSHIFT_EXPR, mode, op0,
3284                                floor_log2 (coeff), target, unsignedp);
3285         }
3286
3287       /* Exclude cost of op0 from max_cost to match the cost
3288          calculation of the synth_mult.  */
3289       max_cost = mul_widen_cost (speed, mode);
3290       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3291                                max_cost))
3292         {
3293           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3294           return expand_mult_const (mode, op0, coeff, target,
3295                                     &algorithm, variant);
3296         }
3297     }
3298   return expand_binop (mode, this_optab, op0, op1, target,
3299                        unsignedp, OPTAB_LIB_WIDEN);
3300 }
3301 \f
3302 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3303    replace division by D, and put the least significant N bits of the result
3304    in *MULTIPLIER_PTR and return the most significant bit.
3305
3306    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3307    needed precision is in PRECISION (should be <= N).
3308
3309    PRECISION should be as small as possible so this function can choose
3310    multiplier more freely.
3311
3312    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3313    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3314
3315    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3316    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3317
3318 unsigned HOST_WIDE_INT
3319 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3320                    unsigned HOST_WIDE_INT *multiplier_ptr,
3321                    int *post_shift_ptr, int *lgup_ptr)
3322 {
3323   int lgup, post_shift;
3324   int pow, pow2;
3325
3326   /* lgup = ceil(log2(divisor)); */
3327   lgup = ceil_log2 (d);
3328
3329   gcc_assert (lgup <= n);
3330
3331   pow = n + lgup;
3332   pow2 = n + lgup - precision;
3333
3334   /* mlow = 2^(N + lgup)/d */
3335   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3336   wide_int mlow = wi::udiv_trunc (val, d);
3337
3338   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3339   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3340   wide_int mhigh = wi::udiv_trunc (val, d);
3341
3342   /* If precision == N, then mlow, mhigh exceed 2^N
3343      (but they do not exceed 2^(N+1)).  */
3344
3345   /* Reduce to lowest terms.  */
3346   for (post_shift = lgup; post_shift > 0; post_shift--)
3347     {
3348       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3349                                                        HOST_BITS_PER_WIDE_INT);
3350       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3351                                                        HOST_BITS_PER_WIDE_INT);
3352       if (ml_lo >= mh_lo)
3353         break;
3354
3355       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3356       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3357     }
3358
3359   *post_shift_ptr = post_shift;
3360   *lgup_ptr = lgup;
3361   if (n < HOST_BITS_PER_WIDE_INT)
3362     {
3363       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3364       *multiplier_ptr = mhigh.to_uhwi () & mask;
3365       return mhigh.to_uhwi () >= mask;
3366     }
3367   else
3368     {
3369       *multiplier_ptr = mhigh.to_uhwi ();
3370       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3371     }
3372 }
3373
3374 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3375    congruent to 1 (mod 2**N).  */
3376
3377 static unsigned HOST_WIDE_INT
3378 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3379 {
3380   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3381
3382   /* The algorithm notes that the choice y = x satisfies
3383      x*y == 1 mod 2^3, since x is assumed odd.
3384      Each iteration doubles the number of bits of significance in y.  */
3385
3386   unsigned HOST_WIDE_INT mask;
3387   unsigned HOST_WIDE_INT y = x;
3388   int nbit = 3;
3389
3390   mask = (n == HOST_BITS_PER_WIDE_INT
3391           ? ~(unsigned HOST_WIDE_INT) 0
3392           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3393
3394   while (nbit < n)
3395     {
3396       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3397       nbit *= 2;
3398     }
3399   return y;
3400 }
3401
3402 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3403    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3404    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3405    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3406    become signed.
3407
3408    The result is put in TARGET if that is convenient.
3409
3410    MODE is the mode of operation.  */
3411
3412 rtx
3413 expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
3414                              rtx op1, rtx target, int unsignedp)
3415 {
3416   rtx tem;
3417   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3418
3419   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3420                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3421   tem = expand_and (mode, tem, op1, NULL_RTX);
3422   adj_operand
3423     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3424                      adj_operand);
3425
3426   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3427                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3428   tem = expand_and (mode, tem, op0, NULL_RTX);
3429   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3430                           target);
3431
3432   return target;
3433 }
3434
3435 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3436
3437 static rtx
3438 extract_high_half (machine_mode mode, rtx op)
3439 {
3440   machine_mode wider_mode;
3441
3442   if (mode == word_mode)
3443     return gen_highpart (mode, op);
3444
3445   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3446
3447   wider_mode = GET_MODE_WIDER_MODE (mode);
3448   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3449                      GET_MODE_BITSIZE (mode), 0, 1);
3450   return convert_modes (mode, wider_mode, op, 0);
3451 }
3452
3453 /* Like expmed_mult_highpart, but only consider using a multiplication
3454    optab.  OP1 is an rtx for the constant operand.  */
3455
3456 static rtx
3457 expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
3458                             rtx target, int unsignedp, int max_cost)
3459 {
3460   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3461   machine_mode wider_mode;
3462   optab moptab;
3463   rtx tem;
3464   int size;
3465   bool speed = optimize_insn_for_speed_p ();
3466
3467   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3468
3469   wider_mode = GET_MODE_WIDER_MODE (mode);
3470   size = GET_MODE_BITSIZE (mode);
3471
3472   /* Firstly, try using a multiplication insn that only generates the needed
3473      high part of the product, and in the sign flavor of unsignedp.  */
3474   if (mul_highpart_cost (speed, mode) < max_cost)
3475     {
3476       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3477       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3478                           unsignedp, OPTAB_DIRECT);
3479       if (tem)
3480         return tem;
3481     }
3482
3483   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3484      Need to adjust the result after the multiplication.  */
3485   if (size - 1 < BITS_PER_WORD
3486       && (mul_highpart_cost (speed, mode)
3487           + 2 * shift_cost (speed, mode, size-1)
3488           + 4 * add_cost (speed, mode) < max_cost))
3489     {
3490       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3491       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3492                           unsignedp, OPTAB_DIRECT);
3493       if (tem)
3494         /* We used the wrong signedness.  Adjust the result.  */
3495         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3496                                             tem, unsignedp);
3497     }
3498
3499   /* Try widening multiplication.  */
3500   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3501   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3502       && mul_widen_cost (speed, wider_mode) < max_cost)
3503     {
3504       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3505                           unsignedp, OPTAB_WIDEN);
3506       if (tem)
3507         return extract_high_half (mode, tem);
3508     }
3509
3510   /* Try widening the mode and perform a non-widening multiplication.  */
3511   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3512       && size - 1 < BITS_PER_WORD
3513       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3514           < max_cost))
3515     {
3516       rtx_insn *insns;
3517       rtx wop0, wop1;
3518
3519       /* We need to widen the operands, for example to ensure the
3520          constant multiplier is correctly sign or zero extended.
3521          Use a sequence to clean-up any instructions emitted by
3522          the conversions if things don't work out.  */
3523       start_sequence ();
3524       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3525       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3526       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3527                           unsignedp, OPTAB_WIDEN);
3528       insns = get_insns ();
3529       end_sequence ();
3530
3531       if (tem)
3532         {
3533           emit_insn (insns);
3534           return extract_high_half (mode, tem);
3535         }
3536     }
3537
3538   /* Try widening multiplication of opposite signedness, and adjust.  */
3539   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3540   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3541       && size - 1 < BITS_PER_WORD
3542       && (mul_widen_cost (speed, wider_mode)
3543           + 2 * shift_cost (speed, mode, size-1)
3544           + 4 * add_cost (speed, mode) < max_cost))
3545     {
3546       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3547                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3548       if (tem != 0)
3549         {
3550           tem = extract_high_half (mode, tem);
3551           /* We used the wrong signedness.  Adjust the result.  */
3552           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3553                                               target, unsignedp);
3554         }
3555     }
3556
3557   return 0;
3558 }
3559
3560 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3561    putting the high half of the result in TARGET if that is convenient,
3562    and return where the result is.  If the operation can not be performed,
3563    0 is returned.
3564
3565    MODE is the mode of operation and result.
3566
3567    UNSIGNEDP nonzero means unsigned multiply.
3568
3569    MAX_COST is the total allowed cost for the expanded RTL.  */
3570
3571 static rtx
3572 expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
3573                       rtx target, int unsignedp, int max_cost)
3574 {
3575   machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3576   unsigned HOST_WIDE_INT cnst1;
3577   int extra_cost;
3578   bool sign_adjust = false;
3579   enum mult_variant variant;
3580   struct algorithm alg;
3581   rtx tem;
3582   bool speed = optimize_insn_for_speed_p ();
3583
3584   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3585   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3586   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3587
3588   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3589
3590   /* We can't optimize modes wider than BITS_PER_WORD.
3591      ??? We might be able to perform double-word arithmetic if
3592      mode == word_mode, however all the cost calculations in
3593      synth_mult etc. assume single-word operations.  */
3594   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3595     return expmed_mult_highpart_optab (mode, op0, op1, target,
3596                                        unsignedp, max_cost);
3597
3598   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3599
3600   /* Check whether we try to multiply by a negative constant.  */
3601   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3602     {
3603       sign_adjust = true;
3604       extra_cost += add_cost (speed, mode);
3605     }
3606
3607   /* See whether shift/add multiplication is cheap enough.  */
3608   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3609                            max_cost - extra_cost))
3610     {
3611       /* See whether the specialized multiplication optabs are
3612          cheaper than the shift/add version.  */
3613       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3614                                         alg.cost.cost + extra_cost);
3615       if (tem)
3616         return tem;
3617
3618       tem = convert_to_mode (wider_mode, op0, unsignedp);
3619       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3620       tem = extract_high_half (mode, tem);
3621
3622       /* Adjust result for signedness.  */
3623       if (sign_adjust)
3624         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3625
3626       return tem;
3627     }
3628   return expmed_mult_highpart_optab (mode, op0, op1, target,
3629                                      unsignedp, max_cost);
3630 }
3631
3632
3633 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3634
3635 static rtx
3636 expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3637 {
3638   rtx result, temp, shift;
3639   rtx_code_label *label;
3640   int logd;
3641   int prec = GET_MODE_PRECISION (mode);
3642
3643   logd = floor_log2 (d);
3644   result = gen_reg_rtx (mode);
3645
3646   /* Avoid conditional branches when they're expensive.  */
3647   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3648       && optimize_insn_for_speed_p ())
3649     {
3650       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3651                                       mode, 0, -1);
3652       if (signmask)
3653         {
3654           HOST_WIDE_INT masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3655           signmask = force_reg (mode, signmask);
3656           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3657
3658           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3659              which instruction sequence to use.  If logical right shifts
3660              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3661              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3662
3663           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3664           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3665               || (set_src_cost (temp, optimize_insn_for_speed_p ())
3666                   > COSTS_N_INSNS (2)))
3667             {
3668               temp = expand_binop (mode, xor_optab, op0, signmask,
3669                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3670               temp = expand_binop (mode, sub_optab, temp, signmask,
3671                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3672               temp = expand_binop (mode, and_optab, temp,
3673                                    gen_int_mode (masklow, mode),
3674                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3675               temp = expand_binop (mode, xor_optab, temp, signmask,
3676                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3677               temp = expand_binop (mode, sub_optab, temp, signmask,
3678                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3679             }
3680           else
3681             {
3682               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3683                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3684               signmask = force_reg (mode, signmask);
3685
3686               temp = expand_binop (mode, add_optab, op0, signmask,
3687                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3688               temp = expand_binop (mode, and_optab, temp,
3689                                    gen_int_mode (masklow, mode),
3690                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3691               temp = expand_binop (mode, sub_optab, temp, signmask,
3692                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3693             }
3694           return temp;
3695         }
3696     }
3697
3698   /* Mask contains the mode's signbit and the significant bits of the
3699      modulus.  By including the signbit in the operation, many targets
3700      can avoid an explicit compare operation in the following comparison
3701      against zero.  */
3702   wide_int mask = wi::mask (logd, false, prec);
3703   mask = wi::set_bit (mask, prec - 1);
3704
3705   temp = expand_binop (mode, and_optab, op0,
3706                        immed_wide_int_const (mask, mode),
3707                        result, 1, OPTAB_LIB_WIDEN);
3708   if (temp != result)
3709     emit_move_insn (result, temp);
3710
3711   label = gen_label_rtx ();
3712   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3713
3714   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3715                        0, OPTAB_LIB_WIDEN);
3716
3717   mask = wi::mask (logd, true, prec);
3718   temp = expand_binop (mode, ior_optab, temp,
3719                        immed_wide_int_const (mask, mode),
3720                        result, 1, OPTAB_LIB_WIDEN);
3721   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3722                        0, OPTAB_LIB_WIDEN);
3723   if (temp != result)
3724     emit_move_insn (result, temp);
3725   emit_label (label);
3726   return result;
3727 }
3728
3729 /* Expand signed division of OP0 by a power of two D in mode MODE.
3730    This routine is only called for positive values of D.  */
3731
3732 static rtx
3733 expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3734 {
3735   rtx temp;
3736   rtx_code_label *label;
3737   int logd;
3738
3739   logd = floor_log2 (d);
3740
3741   if (d == 2
3742       && BRANCH_COST (optimize_insn_for_speed_p (),
3743                       false) >= 1)
3744     {
3745       temp = gen_reg_rtx (mode);
3746       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3747       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3748                            0, OPTAB_LIB_WIDEN);
3749       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3750     }
3751
3752 #ifdef HAVE_conditional_move
3753   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3754       >= 2)
3755     {
3756       rtx temp2;
3757
3758       start_sequence ();
3759       temp2 = copy_to_mode_reg (mode, op0);
3760       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3761                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3762       temp = force_reg (mode, temp);
3763
3764       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3765       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3766                                      mode, temp, temp2, mode, 0);
3767       if (temp2)
3768         {
3769           rtx_insn *seq = get_insns ();
3770           end_sequence ();
3771           emit_insn (seq);
3772           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3773         }
3774       end_sequence ();
3775     }
3776 #endif
3777
3778   if (BRANCH_COST (optimize_insn_for_speed_p (),
3779                    false) >= 2)
3780     {
3781       int ushift = GET_MODE_BITSIZE (mode) - logd;
3782
3783       temp = gen_reg_rtx (mode);
3784       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3785       if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3786           || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3787              > COSTS_N_INSNS (1))
3788         temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3789                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3790       else
3791         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3792                              ushift, NULL_RTX, 1);
3793       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3794                            0, OPTAB_LIB_WIDEN);
3795       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3796     }
3797
3798   label = gen_label_rtx ();
3799   temp = copy_to_mode_reg (mode, op0);
3800   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3801   expand_inc (temp, gen_int_mode (d - 1, mode));
3802   emit_label (label);
3803   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3804 }
3805 \f
3806 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3807    if that is convenient, and returning where the result is.
3808    You may request either the quotient or the remainder as the result;
3809    specify REM_FLAG nonzero to get the remainder.
3810
3811    CODE is the expression code for which kind of division this is;
3812    it controls how rounding is done.  MODE is the machine mode to use.
3813    UNSIGNEDP nonzero means do unsigned division.  */
3814
3815 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3816    and then correct it by or'ing in missing high bits
3817    if result of ANDI is nonzero.
3818    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3819    This could optimize to a bfexts instruction.
3820    But C doesn't use these operations, so their optimizations are
3821    left for later.  */
3822 /* ??? For modulo, we don't actually need the highpart of the first product,
3823    the low part will do nicely.  And for small divisors, the second multiply
3824    can also be a low-part only multiply or even be completely left out.
3825    E.g. to calculate the remainder of a division by 3 with a 32 bit
3826    multiply, multiply with 0x55555556 and extract the upper two bits;
3827    the result is exact for inputs up to 0x1fffffff.
3828    The input range can be reduced by using cross-sum rules.
3829    For odd divisors >= 3, the following table gives right shift counts
3830    so that if a number is shifted by an integer multiple of the given
3831    amount, the remainder stays the same:
3832    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3833    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3834    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3835    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3836    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3837
3838    Cross-sum rules for even numbers can be derived by leaving as many bits
3839    to the right alone as the divisor has zeros to the right.
3840    E.g. if x is an unsigned 32 bit number:
3841    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3842    */
3843
3844 rtx
3845 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
3846                rtx op0, rtx op1, rtx target, int unsignedp)
3847 {
3848   machine_mode compute_mode;
3849   rtx tquotient;
3850   rtx quotient = 0, remainder = 0;
3851   rtx_insn *last;
3852   int size;
3853   rtx_insn *insn;
3854   optab optab1, optab2;
3855   int op1_is_constant, op1_is_pow2 = 0;
3856   int max_cost, extra_cost;
3857   static HOST_WIDE_INT last_div_const = 0;
3858   bool speed = optimize_insn_for_speed_p ();
3859
3860   op1_is_constant = CONST_INT_P (op1);
3861   if (op1_is_constant)
3862     {
3863       unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1);
3864       if (unsignedp)
3865         ext_op1 &= GET_MODE_MASK (mode);
3866       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3867                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3868     }
3869
3870   /*
3871      This is the structure of expand_divmod:
3872
3873      First comes code to fix up the operands so we can perform the operations
3874      correctly and efficiently.
3875
3876      Second comes a switch statement with code specific for each rounding mode.
3877      For some special operands this code emits all RTL for the desired
3878      operation, for other cases, it generates only a quotient and stores it in
3879      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3880      to indicate that it has not done anything.
3881
3882      Last comes code that finishes the operation.  If QUOTIENT is set and
3883      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3884      QUOTIENT is not set, it is computed using trunc rounding.
3885
3886      We try to generate special code for division and remainder when OP1 is a
3887      constant.  If |OP1| = 2**n we can use shifts and some other fast
3888      operations.  For other values of OP1, we compute a carefully selected
3889      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3890      by m.
3891
3892      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3893      half of the product.  Different strategies for generating the product are
3894      implemented in expmed_mult_highpart.
3895
3896      If what we actually want is the remainder, we generate that by another
3897      by-constant multiplication and a subtraction.  */
3898
3899   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3900      code below will malfunction if we are, so check here and handle
3901      the special case if so.  */
3902   if (op1 == const1_rtx)
3903     return rem_flag ? const0_rtx : op0;
3904
3905     /* When dividing by -1, we could get an overflow.
3906      negv_optab can handle overflows.  */
3907   if (! unsignedp && op1 == constm1_rtx)
3908     {
3909       if (rem_flag)
3910         return const0_rtx;
3911       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
3912                           ? negv_optab : neg_optab, op0, target, 0);
3913     }
3914
3915   if (target
3916       /* Don't use the function value register as a target
3917          since we have to read it as well as write it,
3918          and function-inlining gets confused by this.  */
3919       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3920           /* Don't clobber an operand while doing a multi-step calculation.  */
3921           || ((rem_flag || op1_is_constant)
3922               && (reg_mentioned_p (target, op0)
3923                   || (MEM_P (op0) && MEM_P (target))))
3924           || reg_mentioned_p (target, op1)
3925           || (MEM_P (op1) && MEM_P (target))))
3926     target = 0;
3927
3928   /* Get the mode in which to perform this computation.  Normally it will
3929      be MODE, but sometimes we can't do the desired operation in MODE.
3930      If so, pick a wider mode in which we can do the operation.  Convert
3931      to that mode at the start to avoid repeated conversions.
3932
3933      First see what operations we need.  These depend on the expression
3934      we are evaluating.  (We assume that divxx3 insns exist under the
3935      same conditions that modxx3 insns and that these insns don't normally
3936      fail.  If these assumptions are not correct, we may generate less
3937      efficient code in some cases.)
3938
3939      Then see if we find a mode in which we can open-code that operation
3940      (either a division, modulus, or shift).  Finally, check for the smallest
3941      mode for which we can do the operation with a library call.  */
3942
3943   /* We might want to refine this now that we have division-by-constant
3944      optimization.  Since expmed_mult_highpart tries so many variants, it is
3945      not straightforward to generalize this.  Maybe we should make an array
3946      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3947
3948   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3949             ? (unsignedp ? lshr_optab : ashr_optab)
3950             : (unsignedp ? udiv_optab : sdiv_optab));
3951   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3952             ? optab1
3953             : (unsignedp ? udivmod_optab : sdivmod_optab));
3954
3955   for (compute_mode = mode; compute_mode != VOIDmode;
3956        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3957     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3958         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3959       break;
3960
3961   if (compute_mode == VOIDmode)
3962     for (compute_mode = mode; compute_mode != VOIDmode;
3963          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3964       if (optab_libfunc (optab1, compute_mode)
3965           || optab_libfunc (optab2, compute_mode))
3966         break;
3967
3968   /* If we still couldn't find a mode, use MODE, but expand_binop will
3969      probably die.  */
3970   if (compute_mode == VOIDmode)
3971     compute_mode = mode;
3972
3973   if (target && GET_MODE (target) == compute_mode)
3974     tquotient = target;
3975   else
3976     tquotient = gen_reg_rtx (compute_mode);
3977
3978   size = GET_MODE_BITSIZE (compute_mode);
3979 #if 0
3980   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3981      (mode), and thereby get better code when OP1 is a constant.  Do that
3982      later.  It will require going over all usages of SIZE below.  */
3983   size = GET_MODE_BITSIZE (mode);
3984 #endif
3985
3986   /* Only deduct something for a REM if the last divide done was
3987      for a different constant.   Then set the constant of the last
3988      divide.  */
3989   max_cost = (unsignedp
3990               ? udiv_cost (speed, compute_mode)
3991               : sdiv_cost (speed, compute_mode));
3992   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3993                      && INTVAL (op1) == last_div_const))
3994     max_cost -= (mul_cost (speed, compute_mode)
3995                  + add_cost (speed, compute_mode));
3996
3997   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3998
3999   /* Now convert to the best mode to use.  */
4000   if (compute_mode != mode)
4001     {
4002       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4003       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4004
4005       /* convert_modes may have placed op1 into a register, so we
4006          must recompute the following.  */
4007       op1_is_constant = CONST_INT_P (op1);
4008       op1_is_pow2 = (op1_is_constant
4009                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4010                           || (! unsignedp
4011                               && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1))))));
4012     }
4013
4014   /* If one of the operands is a volatile MEM, copy it into a register.  */
4015
4016   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4017     op0 = force_reg (compute_mode, op0);
4018   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4019     op1 = force_reg (compute_mode, op1);
4020
4021   /* If we need the remainder or if OP1 is constant, we need to
4022      put OP0 in a register in case it has any queued subexpressions.  */
4023   if (rem_flag || op1_is_constant)
4024     op0 = force_reg (compute_mode, op0);
4025
4026   last = get_last_insn ();
4027
4028   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4029   if (unsignedp)
4030     {
4031       if (code == FLOOR_DIV_EXPR)
4032         code = TRUNC_DIV_EXPR;
4033       if (code == FLOOR_MOD_EXPR)
4034         code = TRUNC_MOD_EXPR;
4035       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4036         code = TRUNC_DIV_EXPR;
4037     }
4038
4039   if (op1 != const0_rtx)
4040     switch (code)
4041       {
4042       case TRUNC_MOD_EXPR:
4043       case TRUNC_DIV_EXPR:
4044         if (op1_is_constant)
4045           {
4046             if (unsignedp)
4047               {
4048                 unsigned HOST_WIDE_INT mh, ml;
4049                 int pre_shift, post_shift;
4050                 int dummy;
4051                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4052                                             & GET_MODE_MASK (compute_mode));
4053
4054                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4055                   {
4056                     pre_shift = floor_log2 (d);
4057                     if (rem_flag)
4058                       {
4059                         unsigned HOST_WIDE_INT mask
4060                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4061                         remainder
4062                           = expand_binop (compute_mode, and_optab, op0,
4063                                           gen_int_mode (mask, compute_mode),
4064                                           remainder, 1,
4065                                           OPTAB_LIB_WIDEN);
4066                         if (remainder)
4067                           return gen_lowpart (mode, remainder);
4068                       }
4069                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4070                                              pre_shift, tquotient, 1);
4071                   }
4072                 else if (size <= HOST_BITS_PER_WIDE_INT)
4073                   {
4074                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4075                       {
4076                         /* Most significant bit of divisor is set; emit an scc
4077                            insn.  */
4078                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4079                                                           compute_mode, 1, 1);
4080                       }
4081                     else
4082                       {
4083                         /* Find a suitable multiplier and right shift count
4084                            instead of multiplying with D.  */
4085
4086                         mh = choose_multiplier (d, size, size,
4087                                                 &ml, &post_shift, &dummy);
4088
4089                         /* If the suggested multiplier is more than SIZE bits,
4090                            we can do better for even divisors, using an
4091                            initial right shift.  */
4092                         if (mh != 0 && (d & 1) == 0)
4093                           {
4094                             pre_shift = floor_log2 (d & -d);
4095                             mh = choose_multiplier (d >> pre_shift, size,
4096                                                     size - pre_shift,
4097                                                     &ml, &post_shift, &dummy);
4098                             gcc_assert (!mh);
4099                           }
4100                         else
4101                           pre_shift = 0;
4102
4103                         if (mh != 0)
4104                           {
4105                             rtx t1, t2, t3, t4;
4106
4107                             if (post_shift - 1 >= BITS_PER_WORD)
4108                               goto fail1;
4109
4110                             extra_cost
4111                               = (shift_cost (speed, compute_mode, post_shift - 1)
4112                                  + shift_cost (speed, compute_mode, 1)
4113                                  + 2 * add_cost (speed, compute_mode));
4114                             t1 = expmed_mult_highpart
4115                               (compute_mode, op0,
4116                                gen_int_mode (ml, compute_mode),
4117                                NULL_RTX, 1, max_cost - extra_cost);
4118                             if (t1 == 0)
4119                               goto fail1;
4120                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4121                                                                op0, t1),
4122                                                 NULL_RTX);
4123                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4124                                                t2, 1, NULL_RTX, 1);
4125                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4126                                                               t1, t3),
4127                                                 NULL_RTX);
4128                             quotient = expand_shift
4129                               (RSHIFT_EXPR, compute_mode, t4,
4130                                post_shift - 1, tquotient, 1);
4131                           }
4132                         else
4133                           {
4134                             rtx t1, t2;
4135
4136                             if (pre_shift >= BITS_PER_WORD
4137                                 || post_shift >= BITS_PER_WORD)
4138                               goto fail1;
4139
4140                             t1 = expand_shift
4141                               (RSHIFT_EXPR, compute_mode, op0,
4142                                pre_shift, NULL_RTX, 1);
4143                             extra_cost
4144                               = (shift_cost (speed, compute_mode, pre_shift)
4145                                  + shift_cost (speed, compute_mode, post_shift));
4146                             t2 = expmed_mult_highpart
4147                               (compute_mode, t1,
4148                                gen_int_mode (ml, compute_mode),
4149                                NULL_RTX, 1, max_cost - extra_cost);
4150                             if (t2 == 0)
4151                               goto fail1;
4152                             quotient = expand_shift
4153                               (RSHIFT_EXPR, compute_mode, t2,
4154                                post_shift, tquotient, 1);
4155                           }
4156                       }
4157                   }
4158                 else            /* Too wide mode to use tricky code */
4159                   break;
4160
4161                 insn = get_last_insn ();
4162                 if (insn != last)
4163                   set_dst_reg_note (insn, REG_EQUAL,
4164                                     gen_rtx_UDIV (compute_mode, op0, op1),
4165                                     quotient);
4166               }
4167             else                /* TRUNC_DIV, signed */
4168               {
4169                 unsigned HOST_WIDE_INT ml;
4170                 int lgup, post_shift;
4171                 rtx mlr;
4172                 HOST_WIDE_INT d = INTVAL (op1);
4173                 unsigned HOST_WIDE_INT abs_d;
4174
4175                 /* Since d might be INT_MIN, we have to cast to
4176                    unsigned HOST_WIDE_INT before negating to avoid
4177                    undefined signed overflow.  */
4178                 abs_d = (d >= 0
4179                          ? (unsigned HOST_WIDE_INT) d
4180                          : - (unsigned HOST_WIDE_INT) d);
4181
4182                 /* n rem d = n rem -d */
4183                 if (rem_flag && d < 0)
4184                   {
4185                     d = abs_d;
4186                     op1 = gen_int_mode (abs_d, compute_mode);
4187                   }
4188
4189                 if (d == 1)
4190                   quotient = op0;
4191                 else if (d == -1)
4192                   quotient = expand_unop (compute_mode, neg_optab, op0,
4193                                           tquotient, 0);
4194                 else if (HOST_BITS_PER_WIDE_INT >= size
4195                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4196                   {
4197                     /* This case is not handled correctly below.  */
4198                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4199                                                 compute_mode, 1, 1);
4200                     if (quotient == 0)
4201                       goto fail1;
4202                   }
4203                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4204                          && (rem_flag
4205                              ? smod_pow2_cheap (speed, compute_mode)
4206                              : sdiv_pow2_cheap (speed, compute_mode))
4207                          /* We assume that cheap metric is true if the
4208                             optab has an expander for this mode.  */
4209                          && ((optab_handler ((rem_flag ? smod_optab
4210                                               : sdiv_optab),
4211                                              compute_mode)
4212                               != CODE_FOR_nothing)
4213                              || (optab_handler (sdivmod_optab,
4214                                                 compute_mode)
4215                                  != CODE_FOR_nothing)))
4216                   ;
4217                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4218                   {
4219                     if (rem_flag)
4220                       {
4221                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4222                         if (remainder)
4223                           return gen_lowpart (mode, remainder);
4224                       }
4225
4226                     if (sdiv_pow2_cheap (speed, compute_mode)
4227                         && ((optab_handler (sdiv_optab, compute_mode)
4228                              != CODE_FOR_nothing)
4229                             || (optab_handler (sdivmod_optab, compute_mode)
4230                                 != CODE_FOR_nothing)))
4231                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4232                                                 compute_mode, op0,
4233                                                 gen_int_mode (abs_d,
4234                                                               compute_mode),
4235                                                 NULL_RTX, 0);
4236                     else
4237                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4238
4239                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4240                        negate the quotient.  */
4241                     if (d < 0)
4242                       {
4243                         insn = get_last_insn ();
4244                         if (insn != last
4245                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4246                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4247                           set_dst_reg_note (insn, REG_EQUAL,
4248                                             gen_rtx_DIV (compute_mode, op0,
4249                                                          gen_int_mode
4250                                                            (abs_d,
4251                                                             compute_mode)),
4252                                             quotient);
4253
4254                         quotient = expand_unop (compute_mode, neg_optab,
4255                                                 quotient, quotient, 0);
4256                       }
4257                   }
4258                 else if (size <= HOST_BITS_PER_WIDE_INT)
4259                   {
4260                     choose_multiplier (abs_d, size, size - 1,
4261                                        &ml, &post_shift, &lgup);
4262                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4263                       {
4264                         rtx t1, t2, t3;
4265
4266                         if (post_shift >= BITS_PER_WORD
4267                             || size - 1 >= BITS_PER_WORD)
4268                           goto fail1;
4269
4270                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4271                                       + shift_cost (speed, compute_mode, size - 1)
4272                                       + add_cost (speed, compute_mode));
4273                         t1 = expmed_mult_highpart
4274                           (compute_mode, op0, gen_int_mode (ml, compute_mode),
4275                            NULL_RTX, 0, max_cost - extra_cost);
4276                         if (t1 == 0)
4277                           goto fail1;
4278                         t2 = expand_shift
4279                           (RSHIFT_EXPR, compute_mode, t1,
4280                            post_shift, NULL_RTX, 0);
4281                         t3 = expand_shift
4282                           (RSHIFT_EXPR, compute_mode, op0,
4283                            size - 1, NULL_RTX, 0);
4284                         if (d < 0)
4285                           quotient
4286                             = force_operand (gen_rtx_MINUS (compute_mode,
4287                                                             t3, t2),
4288                                              tquotient);
4289                         else
4290                           quotient
4291                             = force_operand (gen_rtx_MINUS (compute_mode,
4292                                                             t2, t3),
4293                                              tquotient);
4294                       }
4295                     else
4296                       {
4297                         rtx t1, t2, t3, t4;
4298
4299                         if (post_shift >= BITS_PER_WORD
4300                             || size - 1 >= BITS_PER_WORD)
4301                           goto fail1;
4302
4303                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4304                         mlr = gen_int_mode (ml, compute_mode);
4305                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4306                                       + shift_cost (speed, compute_mode, size - 1)
4307                                       + 2 * add_cost (speed, compute_mode));
4308                         t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4309                                                    NULL_RTX, 0,
4310                                                    max_cost - extra_cost);
4311                         if (t1 == 0)
4312                           goto fail1;
4313                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4314                                                           t1, op0),
4315                                             NULL_RTX);
4316                         t3 = expand_shift
4317                           (RSHIFT_EXPR, compute_mode, t2,
4318                            post_shift, NULL_RTX, 0);
4319                         t4 = expand_shift
4320                           (RSHIFT_EXPR, compute_mode, op0,
4321                            size - 1, NULL_RTX, 0);
4322                         if (d < 0)
4323                           quotient
4324                             = force_operand (gen_rtx_MINUS (compute_mode,
4325                                                             t4, t3),
4326                                              tquotient);
4327                         else
4328                           quotient
4329                             = force_operand (gen_rtx_MINUS (compute_mode,
4330                                                             t3, t4),
4331                                              tquotient);
4332                       }
4333                   }
4334                 else            /* Too wide mode to use tricky code */
4335                   break;
4336
4337                 insn = get_last_insn ();
4338                 if (insn != last)
4339                   set_dst_reg_note (insn, REG_EQUAL,
4340                                     gen_rtx_DIV (compute_mode, op0, op1),
4341                                     quotient);
4342               }
4343             break;
4344           }
4345       fail1:
4346         delete_insns_since (last);
4347         break;
4348
4349       case FLOOR_DIV_EXPR:
4350       case FLOOR_MOD_EXPR:
4351       /* We will come here only for signed operations.  */
4352         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4353           {
4354             unsigned HOST_WIDE_INT mh, ml;
4355             int pre_shift, lgup, post_shift;
4356             HOST_WIDE_INT d = INTVAL (op1);
4357
4358             if (d > 0)
4359               {
4360                 /* We could just as easily deal with negative constants here,
4361                    but it does not seem worth the trouble for GCC 2.6.  */
4362                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4363                   {
4364                     pre_shift = floor_log2 (d);
4365                     if (rem_flag)
4366                       {
4367                         unsigned HOST_WIDE_INT mask
4368                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4369                         remainder = expand_binop
4370                           (compute_mode, and_optab, op0,
4371                            gen_int_mode (mask, compute_mode),
4372                            remainder, 0, OPTAB_LIB_WIDEN);
4373                         if (remainder)
4374                           return gen_lowpart (mode, remainder);
4375                       }
4376                     quotient = expand_shift
4377                       (RSHIFT_EXPR, compute_mode, op0,
4378                        pre_shift, tquotient, 0);
4379                   }
4380                 else
4381                   {
4382                     rtx t1, t2, t3, t4;
4383
4384                     mh = choose_multiplier (d, size, size - 1,
4385                                             &ml, &post_shift, &lgup);
4386                     gcc_assert (!mh);
4387
4388                     if (post_shift < BITS_PER_WORD
4389                         && size - 1 < BITS_PER_WORD)
4390                       {
4391                         t1 = expand_shift
4392                           (RSHIFT_EXPR, compute_mode, op0,
4393                            size - 1, NULL_RTX, 0);
4394                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4395                                            NULL_RTX, 0, OPTAB_WIDEN);
4396                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4397                                       + shift_cost (speed, compute_mode, size - 1)
4398                                       + 2 * add_cost (speed, compute_mode));
4399                         t3 = expmed_mult_highpart
4400                           (compute_mode, t2, gen_int_mode (ml, compute_mode),
4401                            NULL_RTX, 1, max_cost - extra_cost);
4402                         if (t3 != 0)
4403                           {
4404                             t4 = expand_shift
4405                               (RSHIFT_EXPR, compute_mode, t3,
4406                                post_shift, NULL_RTX, 1);
4407                             quotient = expand_binop (compute_mode, xor_optab,
4408                                                      t4, t1, tquotient, 0,
4409                                                      OPTAB_WIDEN);
4410                           }
4411                       }
4412                   }
4413               }
4414             else
4415               {
4416                 rtx nsign, t1, t2, t3, t4;
4417                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4418                                                   op0, constm1_rtx), NULL_RTX);
4419                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4420                                    0, OPTAB_WIDEN);
4421                 nsign = expand_shift
4422                   (RSHIFT_EXPR, compute_mode, t2,
4423                    size - 1, NULL_RTX, 0);
4424                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4425                                     NULL_RTX);
4426                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4427                                     NULL_RTX, 0);
4428                 if (t4)
4429                   {
4430                     rtx t5;
4431                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4432                                       NULL_RTX, 0);
4433                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4434                                                             t4, t5),
4435                                               tquotient);
4436                   }
4437               }
4438           }
4439
4440         if (quotient != 0)
4441           break;
4442         delete_insns_since (last);
4443
4444         /* Try using an instruction that produces both the quotient and
4445            remainder, using truncation.  We can easily compensate the quotient
4446            or remainder to get floor rounding, once we have the remainder.
4447            Notice that we compute also the final remainder value here,
4448            and return the result right away.  */
4449         if (target == 0 || GET_MODE (target) != compute_mode)
4450           target = gen_reg_rtx (compute_mode);
4451
4452         if (rem_flag)
4453           {
4454             remainder
4455               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4456             quotient = gen_reg_rtx (compute_mode);
4457           }
4458         else
4459           {
4460             quotient
4461               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4462             remainder = gen_reg_rtx (compute_mode);
4463           }
4464
4465         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4466                                  quotient, remainder, 0))
4467           {
4468             /* This could be computed with a branch-less sequence.
4469                Save that for later.  */
4470             rtx tem;
4471             rtx_code_label *label = gen_label_rtx ();
4472             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4473             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4474                                 NULL_RTX, 0, OPTAB_WIDEN);
4475             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4476             expand_dec (quotient, const1_rtx);
4477             expand_inc (remainder, op1);
4478             emit_label (label);
4479             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4480           }
4481
4482         /* No luck with division elimination or divmod.  Have to do it
4483            by conditionally adjusting op0 *and* the result.  */
4484         {
4485           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4486           rtx adjusted_op0;
4487           rtx tem;
4488
4489           quotient = gen_reg_rtx (compute_mode);
4490           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4491           label1 = gen_label_rtx ();
4492           label2 = gen_label_rtx ();
4493           label3 = gen_label_rtx ();
4494           label4 = gen_label_rtx ();
4495           label5 = gen_label_rtx ();
4496           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4497           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4498           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4499                               quotient, 0, OPTAB_LIB_WIDEN);
4500           if (tem != quotient)
4501             emit_move_insn (quotient, tem);
4502           emit_jump_insn (gen_jump (label5));
4503           emit_barrier ();
4504           emit_label (label1);
4505           expand_inc (adjusted_op0, const1_rtx);
4506           emit_jump_insn (gen_jump (label4));
4507           emit_barrier ();
4508           emit_label (label2);
4509           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4510           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4511                               quotient, 0, OPTAB_LIB_WIDEN);
4512           if (tem != quotient)
4513             emit_move_insn (quotient, tem);
4514           emit_jump_insn (gen_jump (label5));
4515           emit_barrier ();
4516           emit_label (label3);
4517           expand_dec (adjusted_op0, const1_rtx);
4518           emit_label (label4);
4519           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4520                               quotient, 0, OPTAB_LIB_WIDEN);
4521           if (tem != quotient)
4522             emit_move_insn (quotient, tem);
4523           expand_dec (quotient, const1_rtx);
4524           emit_label (label5);
4525         }
4526         break;
4527
4528       case CEIL_DIV_EXPR:
4529       case CEIL_MOD_EXPR:
4530         if (unsignedp)
4531           {
4532             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4533               {
4534                 rtx t1, t2, t3;
4535                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4536                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4537                                    floor_log2 (d), tquotient, 1);
4538                 t2 = expand_binop (compute_mode, and_optab, op0,
4539                                    gen_int_mode (d - 1, compute_mode),
4540                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4541                 t3 = gen_reg_rtx (compute_mode);
4542                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4543                                       compute_mode, 1, 1);
4544                 if (t3 == 0)
4545                   {
4546                     rtx_code_label *lab;
4547                     lab = gen_label_rtx ();
4548                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4549                     expand_inc (t1, const1_rtx);
4550                     emit_label (lab);
4551                     quotient = t1;
4552                   }
4553                 else
4554                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4555                                                           t1, t3),
4556                                             tquotient);
4557                 break;
4558               }
4559
4560             /* Try using an instruction that produces both the quotient and
4561                remainder, using truncation.  We can easily compensate the
4562                quotient or remainder to get ceiling rounding, once we have the
4563                remainder.  Notice that we compute also the final remainder
4564                value here, and return the result right away.  */
4565             if (target == 0 || GET_MODE (target) != compute_mode)
4566               target = gen_reg_rtx (compute_mode);
4567
4568             if (rem_flag)
4569               {
4570                 remainder = (REG_P (target)
4571                              ? target : gen_reg_rtx (compute_mode));
4572                 quotient = gen_reg_rtx (compute_mode);
4573               }
4574             else
4575               {
4576                 quotient = (REG_P (target)
4577                             ? target : gen_reg_rtx (compute_mode));
4578                 remainder = gen_reg_rtx (compute_mode);
4579               }
4580
4581             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4582                                      remainder, 1))
4583               {
4584                 /* This could be computed with a branch-less sequence.
4585                    Save that for later.  */
4586                 rtx_code_label *label = gen_label_rtx ();
4587                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4588                                  compute_mode, label);
4589                 expand_inc (quotient, const1_rtx);
4590                 expand_dec (remainder, op1);
4591                 emit_label (label);
4592                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4593               }
4594
4595             /* No luck with division elimination or divmod.  Have to do it
4596                by conditionally adjusting op0 *and* the result.  */
4597             {
4598               rtx_code_label *label1, *label2;
4599               rtx adjusted_op0, tem;
4600
4601               quotient = gen_reg_rtx (compute_mode);
4602               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4603               label1 = gen_label_rtx ();
4604               label2 = gen_label_rtx ();
4605               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4606                                compute_mode, label1);
4607               emit_move_insn  (quotient, const0_rtx);
4608               emit_jump_insn (gen_jump (label2));
4609               emit_barrier ();
4610               emit_label (label1);
4611               expand_dec (adjusted_op0, const1_rtx);
4612               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4613                                   quotient, 1, OPTAB_LIB_WIDEN);
4614               if (tem != quotient)
4615                 emit_move_insn (quotient, tem);
4616               expand_inc (quotient, const1_rtx);
4617               emit_label (label2);
4618             }
4619           }
4620         else /* signed */
4621           {
4622             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4623                 && INTVAL (op1) >= 0)
4624               {
4625                 /* This is extremely similar to the code for the unsigned case
4626                    above.  For 2.7 we should merge these variants, but for
4627                    2.6.1 I don't want to touch the code for unsigned since that
4628                    get used in C.  The signed case will only be used by other
4629                    languages (Ada).  */
4630
4631                 rtx t1, t2, t3;
4632                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4633                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4634                                    floor_log2 (d), tquotient, 0);
4635                 t2 = expand_binop (compute_mode, and_optab, op0,
4636                                    gen_int_mode (d - 1, compute_mode),
4637                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4638                 t3 = gen_reg_rtx (compute_mode);
4639                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4640                                       compute_mode, 1, 1);
4641                 if (t3 == 0)
4642                   {
4643                     rtx_code_label *lab;
4644                     lab = gen_label_rtx ();
4645                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4646                     expand_inc (t1, const1_rtx);
4647                     emit_label (lab);
4648                     quotient = t1;
4649                   }
4650                 else
4651                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4652                                                           t1, t3),
4653                                             tquotient);
4654                 break;
4655               }
4656
4657             /* Try using an instruction that produces both the quotient and
4658                remainder, using truncation.  We can easily compensate the
4659                quotient or remainder to get ceiling rounding, once we have the
4660                remainder.  Notice that we compute also the final remainder
4661                value here, and return the result right away.  */
4662             if (target == 0 || GET_MODE (target) != compute_mode)
4663               target = gen_reg_rtx (compute_mode);
4664             if (rem_flag)
4665               {
4666                 remainder= (REG_P (target)
4667                             ? target : gen_reg_rtx (compute_mode));
4668                 quotient = gen_reg_rtx (compute_mode);
4669               }
4670             else
4671               {
4672                 quotient = (REG_P (target)
4673                             ? target : gen_reg_rtx (compute_mode));
4674                 remainder = gen_reg_rtx (compute_mode);
4675               }
4676
4677             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4678                                      remainder, 0))
4679               {
4680                 /* This could be computed with a branch-less sequence.
4681                    Save that for later.  */
4682                 rtx tem;
4683                 rtx_code_label *label = gen_label_rtx ();
4684                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4685                                  compute_mode, label);
4686                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4687                                     NULL_RTX, 0, OPTAB_WIDEN);
4688                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4689                 expand_inc (quotient, const1_rtx);
4690                 expand_dec (remainder, op1);
4691                 emit_label (label);
4692                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4693               }
4694
4695             /* No luck with division elimination or divmod.  Have to do it
4696                by conditionally adjusting op0 *and* the result.  */
4697             {
4698               rtx_code_label *label1, *label2, *label3, *label4, *label5;
4699               rtx adjusted_op0;
4700               rtx tem;
4701
4702               quotient = gen_reg_rtx (compute_mode);
4703               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4704               label1 = gen_label_rtx ();
4705               label2 = gen_label_rtx ();
4706               label3 = gen_label_rtx ();
4707               label4 = gen_label_rtx ();
4708               label5 = gen_label_rtx ();
4709               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4710               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4711                                compute_mode, label1);
4712               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4713                                   quotient, 0, OPTAB_LIB_WIDEN);
4714               if (tem != quotient)
4715                 emit_move_insn (quotient, tem);
4716               emit_jump_insn (gen_jump (label5));
4717               emit_barrier ();
4718               emit_label (label1);
4719               expand_dec (adjusted_op0, const1_rtx);
4720               emit_jump_insn (gen_jump (label4));
4721               emit_barrier ();
4722               emit_label (label2);
4723               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4724                                compute_mode, label3);
4725               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4726                                   quotient, 0, OPTAB_LIB_WIDEN);
4727               if (tem != quotient)
4728                 emit_move_insn (quotient, tem);
4729               emit_jump_insn (gen_jump (label5));
4730               emit_barrier ();
4731               emit_label (label3);
4732               expand_inc (adjusted_op0, const1_rtx);
4733               emit_label (label4);
4734               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4735                                   quotient, 0, OPTAB_LIB_WIDEN);
4736               if (tem != quotient)
4737                 emit_move_insn (quotient, tem);
4738               expand_inc (quotient, const1_rtx);
4739               emit_label (label5);
4740             }
4741           }
4742         break;
4743
4744       case EXACT_DIV_EXPR:
4745         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4746           {
4747             HOST_WIDE_INT d = INTVAL (op1);
4748             unsigned HOST_WIDE_INT ml;
4749             int pre_shift;
4750             rtx t1;
4751
4752             pre_shift = floor_log2 (d & -d);
4753             ml = invert_mod2n (d >> pre_shift, size);
4754             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4755                                pre_shift, NULL_RTX, unsignedp);
4756             quotient = expand_mult (compute_mode, t1,
4757                                     gen_int_mode (ml, compute_mode),
4758                                     NULL_RTX, 1);
4759
4760             insn = get_last_insn ();
4761             set_dst_reg_note (insn, REG_EQUAL,
4762                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4763                                               compute_mode, op0, op1),
4764                               quotient);
4765           }
4766         break;
4767
4768       case ROUND_DIV_EXPR:
4769       case ROUND_MOD_EXPR:
4770         if (unsignedp)
4771           {
4772             rtx tem;
4773             rtx_code_label *label;
4774             label = gen_label_rtx ();
4775             quotient = gen_reg_rtx (compute_mode);
4776             remainder = gen_reg_rtx (compute_mode);
4777             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4778               {
4779                 rtx tem;
4780                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4781                                          quotient, 1, OPTAB_LIB_WIDEN);
4782                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4783                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4784                                           remainder, 1, OPTAB_LIB_WIDEN);
4785               }
4786             tem = plus_constant (compute_mode, op1, -1);
4787             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4788             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4789             expand_inc (quotient, const1_rtx);
4790             expand_dec (remainder, op1);
4791             emit_label (label);
4792           }
4793         else
4794           {
4795             rtx abs_rem, abs_op1, tem, mask;
4796             rtx_code_label *label;
4797             label = gen_label_rtx ();
4798             quotient = gen_reg_rtx (compute_mode);
4799             remainder = gen_reg_rtx (compute_mode);
4800             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4801               {
4802                 rtx tem;
4803                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4804                                          quotient, 0, OPTAB_LIB_WIDEN);
4805                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4806                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4807                                           remainder, 0, OPTAB_LIB_WIDEN);
4808               }
4809             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4810             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4811             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4812                                 1, NULL_RTX, 1);
4813             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4814             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4815                                 NULL_RTX, 0, OPTAB_WIDEN);
4816             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4817                                  size - 1, NULL_RTX, 0);
4818             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4819                                 NULL_RTX, 0, OPTAB_WIDEN);
4820             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4821                                 NULL_RTX, 0, OPTAB_WIDEN);
4822             expand_inc (quotient, tem);
4823             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4824                                 NULL_RTX, 0, OPTAB_WIDEN);
4825             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4826                                 NULL_RTX, 0, OPTAB_WIDEN);
4827             expand_dec (remainder, tem);
4828             emit_label (label);
4829           }
4830         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4831
4832       default:
4833         gcc_unreachable ();
4834       }
4835
4836   if (quotient == 0)
4837     {
4838       if (target && GET_MODE (target) != compute_mode)
4839         target = 0;
4840
4841       if (rem_flag)
4842         {
4843           /* Try to produce the remainder without producing the quotient.
4844              If we seem to have a divmod pattern that does not require widening,
4845              don't try widening here.  We should really have a WIDEN argument
4846              to expand_twoval_binop, since what we'd really like to do here is
4847              1) try a mod insn in compute_mode
4848              2) try a divmod insn in compute_mode
4849              3) try a div insn in compute_mode and multiply-subtract to get
4850                 remainder
4851              4) try the same things with widening allowed.  */
4852           remainder
4853             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4854                                  op0, op1, target,
4855                                  unsignedp,
4856                                  ((optab_handler (optab2, compute_mode)
4857                                    != CODE_FOR_nothing)
4858                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4859           if (remainder == 0)
4860             {
4861               /* No luck there.  Can we do remainder and divide at once
4862                  without a library call?  */
4863               remainder = gen_reg_rtx (compute_mode);
4864               if (! expand_twoval_binop ((unsignedp
4865                                           ? udivmod_optab
4866                                           : sdivmod_optab),
4867                                          op0, op1,
4868                                          NULL_RTX, remainder, unsignedp))
4869                 remainder = 0;
4870             }
4871
4872           if (remainder)
4873             return gen_lowpart (mode, remainder);
4874         }
4875
4876       /* Produce the quotient.  Try a quotient insn, but not a library call.
4877          If we have a divmod in this mode, use it in preference to widening
4878          the div (for this test we assume it will not fail). Note that optab2
4879          is set to the one of the two optabs that the call below will use.  */
4880       quotient
4881         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4882                              op0, op1, rem_flag ? NULL_RTX : target,
4883                              unsignedp,
4884                              ((optab_handler (optab2, compute_mode)
4885                                != CODE_FOR_nothing)
4886                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4887
4888       if (quotient == 0)
4889         {
4890           /* No luck there.  Try a quotient-and-remainder insn,
4891              keeping the quotient alone.  */
4892           quotient = gen_reg_rtx (compute_mode);
4893           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4894                                      op0, op1,
4895                                      quotient, NULL_RTX, unsignedp))
4896             {
4897               quotient = 0;
4898               if (! rem_flag)
4899                 /* Still no luck.  If we are not computing the remainder,
4900                    use a library call for the quotient.  */
4901                 quotient = sign_expand_binop (compute_mode,
4902                                               udiv_optab, sdiv_optab,
4903                                               op0, op1, target,
4904                                               unsignedp, OPTAB_LIB_WIDEN);
4905             }
4906         }
4907     }
4908
4909   if (rem_flag)
4910     {
4911       if (target && GET_MODE (target) != compute_mode)
4912         target = 0;
4913
4914       if (quotient == 0)
4915         {
4916           /* No divide instruction either.  Use library for remainder.  */
4917           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4918                                          op0, op1, target,
4919                                          unsignedp, OPTAB_LIB_WIDEN);
4920           /* No remainder function.  Try a quotient-and-remainder
4921              function, keeping the remainder.  */
4922           if (!remainder)
4923             {
4924               remainder = gen_reg_rtx (compute_mode);
4925               if (!expand_twoval_binop_libfunc
4926                   (unsignedp ? udivmod_optab : sdivmod_optab,
4927                    op0, op1,
4928                    NULL_RTX, remainder,
4929                    unsignedp ? UMOD : MOD))
4930                 remainder = NULL_RTX;
4931             }
4932         }
4933       else
4934         {
4935           /* We divided.  Now finish doing X - Y * (X / Y).  */
4936           remainder = expand_mult (compute_mode, quotient, op1,
4937                                    NULL_RTX, unsignedp);
4938           remainder = expand_binop (compute_mode, sub_optab, op0,
4939                                     remainder, target, unsignedp,
4940                                     OPTAB_LIB_WIDEN);
4941         }
4942     }
4943
4944   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4945 }
4946 \f
4947 /* Return a tree node with data type TYPE, describing the value of X.
4948    Usually this is an VAR_DECL, if there is no obvious better choice.
4949    X may be an expression, however we only support those expressions
4950    generated by loop.c.  */
4951
4952 tree
4953 make_tree (tree type, rtx x)
4954 {
4955   tree t;
4956
4957   switch (GET_CODE (x))
4958     {
4959     case CONST_INT:
4960     case CONST_WIDE_INT:
4961       t = wide_int_to_tree (type, std::make_pair (x, TYPE_MODE (type)));
4962       return t;
4963
4964     case CONST_DOUBLE:
4965       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
4966       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
4967         t = wide_int_to_tree (type,
4968                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
4969                                                     HOST_BITS_PER_WIDE_INT * 2));
4970       else
4971         {
4972           REAL_VALUE_TYPE d;
4973
4974           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4975           t = build_real (type, d);
4976         }
4977
4978       return t;
4979
4980     case CONST_VECTOR:
4981       {
4982         int units = CONST_VECTOR_NUNITS (x);
4983         tree itype = TREE_TYPE (type);
4984         tree *elts;
4985         int i;
4986
4987         /* Build a tree with vector elements.  */
4988         elts = XALLOCAVEC (tree, units);
4989         for (i = units - 1; i >= 0; --i)
4990           {
4991             rtx elt = CONST_VECTOR_ELT (x, i);
4992             elts[i] = make_tree (itype, elt);
4993           }
4994
4995         return build_vector (type, elts);
4996       }
4997
4998     case PLUS:
4999       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5000                           make_tree (type, XEXP (x, 1)));
5001
5002     case MINUS:
5003       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5004                           make_tree (type, XEXP (x, 1)));
5005
5006     case NEG:
5007       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5008
5009     case MULT:
5010       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5011                           make_tree (type, XEXP (x, 1)));
5012
5013     case ASHIFT:
5014       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5015                           make_tree (type, XEXP (x, 1)));
5016
5017     case LSHIFTRT:
5018       t = unsigned_type_for (type);
5019       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5020                                          make_tree (t, XEXP (x, 0)),
5021                                          make_tree (type, XEXP (x, 1))));
5022
5023     case ASHIFTRT:
5024       t = signed_type_for (type);
5025       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5026                                          make_tree (t, XEXP (x, 0)),
5027                                          make_tree (type, XEXP (x, 1))));
5028
5029     case DIV:
5030       if (TREE_CODE (type) != REAL_TYPE)
5031         t = signed_type_for (type);
5032       else
5033         t = type;
5034
5035       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5036                                          make_tree (t, XEXP (x, 0)),
5037                                          make_tree (t, XEXP (x, 1))));
5038     case UDIV:
5039       t = unsigned_type_for (type);
5040       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5041                                          make_tree (t, XEXP (x, 0)),
5042                                          make_tree (t, XEXP (x, 1))));
5043
5044     case SIGN_EXTEND:
5045     case ZERO_EXTEND:
5046       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5047                                           GET_CODE (x) == ZERO_EXTEND);
5048       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5049
5050     case CONST:
5051       return make_tree (type, XEXP (x, 0));
5052
5053     case SYMBOL_REF:
5054       t = SYMBOL_REF_DECL (x);
5055       if (t)
5056         return fold_convert (type, build_fold_addr_expr (t));
5057       /* else fall through.  */
5058
5059     default:
5060       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5061
5062       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5063          address mode to pointer mode.  */
5064       if (POINTER_TYPE_P (type))
5065         x = convert_memory_address_addr_space
5066               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5067
5068       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5069          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5070       t->decl_with_rtl.rtl = x;
5071
5072       return t;
5073     }
5074 }
5075 \f
5076 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5077    and returning TARGET.
5078
5079    If TARGET is 0, a pseudo-register or constant is returned.  */
5080
5081 rtx
5082 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5083 {
5084   rtx tem = 0;
5085
5086   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5087     tem = simplify_binary_operation (AND, mode, op0, op1);
5088   if (tem == 0)
5089     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5090
5091   if (target == 0)
5092     target = tem;
5093   else if (tem != target)
5094     emit_move_insn (target, tem);
5095   return target;
5096 }
5097
5098 /* Helper function for emit_store_flag.  */
5099 static rtx
5100 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5101              machine_mode mode, machine_mode compare_mode,
5102              int unsignedp, rtx x, rtx y, int normalizep,
5103              machine_mode target_mode)
5104 {
5105   struct expand_operand ops[4];
5106   rtx op0, comparison, subtarget;
5107   rtx_insn *last;
5108   machine_mode result_mode = targetm.cstore_mode (icode);
5109
5110   last = get_last_insn ();
5111   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5112   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5113   if (!x || !y)
5114     {
5115       delete_insns_since (last);
5116       return NULL_RTX;
5117     }
5118
5119   if (target_mode == VOIDmode)
5120     target_mode = result_mode;
5121   if (!target)
5122     target = gen_reg_rtx (target_mode);
5123
5124   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5125
5126   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5127   create_fixed_operand (&ops[1], comparison);
5128   create_fixed_operand (&ops[2], x);
5129   create_fixed_operand (&ops[3], y);
5130   if (!maybe_expand_insn (icode, 4, ops))
5131     {
5132       delete_insns_since (last);
5133       return NULL_RTX;
5134     }
5135   subtarget = ops[0].value;
5136
5137   /* If we are converting to a wider mode, first convert to
5138      TARGET_MODE, then normalize.  This produces better combining
5139      opportunities on machines that have a SIGN_EXTRACT when we are
5140      testing a single bit.  This mostly benefits the 68k.
5141
5142      If STORE_FLAG_VALUE does not have the sign bit set when
5143      interpreted in MODE, we can do this conversion as unsigned, which
5144      is usually more efficient.  */
5145   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5146     {
5147       convert_move (target, subtarget,
5148                     val_signbit_known_clear_p (result_mode,
5149                                                STORE_FLAG_VALUE));
5150       op0 = target;
5151       result_mode = target_mode;
5152     }
5153   else
5154     op0 = subtarget;
5155
5156   /* If we want to keep subexpressions around, don't reuse our last
5157      target.  */
5158   if (optimize)
5159     subtarget = 0;
5160
5161   /* Now normalize to the proper value in MODE.  Sometimes we don't
5162      have to do anything.  */
5163   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5164     ;
5165   /* STORE_FLAG_VALUE might be the most negative number, so write
5166      the comparison this way to avoid a compiler-time warning.  */
5167   else if (- normalizep == STORE_FLAG_VALUE)
5168     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5169
5170   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5171      it hard to use a value of just the sign bit due to ANSI integer
5172      constant typing rules.  */
5173   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5174     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5175                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5176                         normalizep == 1);
5177   else
5178     {
5179       gcc_assert (STORE_FLAG_VALUE & 1);
5180
5181       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5182       if (normalizep == -1)
5183         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5184     }
5185
5186   /* If we were converting to a smaller mode, do the conversion now.  */
5187   if (target_mode != result_mode)
5188     {
5189       convert_move (target, op0, 0);
5190       return target;
5191     }
5192   else
5193     return op0;
5194 }
5195
5196
5197 /* A subroutine of emit_store_flag only including "tricks" that do not
5198    need a recursive call.  These are kept separate to avoid infinite
5199    loops.  */
5200
5201 static rtx
5202 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5203                    machine_mode mode, int unsignedp, int normalizep,
5204                    machine_mode target_mode)
5205 {
5206   rtx subtarget;
5207   enum insn_code icode;
5208   machine_mode compare_mode;
5209   enum mode_class mclass;
5210   enum rtx_code scode;
5211   rtx tem;
5212
5213   if (unsignedp)
5214     code = unsigned_condition (code);
5215   scode = swap_condition (code);
5216
5217   /* If one operand is constant, make it the second one.  Only do this
5218      if the other operand is not constant as well.  */
5219
5220   if (swap_commutative_operands_p (op0, op1))
5221     {
5222       tem = op0;
5223       op0 = op1;
5224       op1 = tem;
5225       code = swap_condition (code);
5226     }
5227
5228   if (mode == VOIDmode)
5229     mode = GET_MODE (op0);
5230
5231   /* For some comparisons with 1 and -1, we can convert this to
5232      comparisons with zero.  This will often produce more opportunities for
5233      store-flag insns.  */
5234
5235   switch (code)
5236     {
5237     case LT:
5238       if (op1 == const1_rtx)
5239         op1 = const0_rtx, code = LE;
5240       break;
5241     case LE:
5242       if (op1 == constm1_rtx)
5243         op1 = const0_rtx, code = LT;
5244       break;
5245     case GE:
5246       if (op1 == const1_rtx)
5247         op1 = const0_rtx, code = GT;
5248       break;
5249     case GT:
5250       if (op1 == constm1_rtx)
5251         op1 = const0_rtx, code = GE;
5252       break;
5253     case GEU:
5254       if (op1 == const1_rtx)
5255         op1 = const0_rtx, code = NE;
5256       break;
5257     case LTU:
5258       if (op1 == const1_rtx)
5259         op1 = const0_rtx, code = EQ;
5260       break;
5261     default:
5262       break;
5263     }
5264
5265   /* If we are comparing a double-word integer with zero or -1, we can
5266      convert the comparison into one involving a single word.  */
5267   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5268       && GET_MODE_CLASS (mode) == MODE_INT
5269       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5270     {
5271       if ((code == EQ || code == NE)
5272           && (op1 == const0_rtx || op1 == constm1_rtx))
5273         {
5274           rtx op00, op01;
5275
5276           /* Do a logical OR or AND of the two words and compare the
5277              result.  */
5278           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5279           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5280           tem = expand_binop (word_mode,
5281                               op1 == const0_rtx ? ior_optab : and_optab,
5282                               op00, op01, NULL_RTX, unsignedp,
5283                               OPTAB_DIRECT);
5284
5285           if (tem != 0)
5286             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5287                                    unsignedp, normalizep);
5288         }
5289       else if ((code == LT || code == GE) && op1 == const0_rtx)
5290         {
5291           rtx op0h;
5292
5293           /* If testing the sign bit, can just test on high word.  */
5294           op0h = simplify_gen_subreg (word_mode, op0, mode,
5295                                       subreg_highpart_offset (word_mode,
5296                                                               mode));
5297           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5298                                  unsignedp, normalizep);
5299         }
5300       else
5301         tem = NULL_RTX;
5302
5303       if (tem)
5304         {
5305           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5306             return tem;
5307           if (!target)
5308             target = gen_reg_rtx (target_mode);
5309
5310           convert_move (target, tem,
5311                         !val_signbit_known_set_p (word_mode,
5312                                                   (normalizep ? normalizep
5313                                                    : STORE_FLAG_VALUE)));
5314           return target;
5315         }
5316     }
5317
5318   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5319      complement of A (for GE) and shifting the sign bit to the low bit.  */
5320   if (op1 == const0_rtx && (code == LT || code == GE)
5321       && GET_MODE_CLASS (mode) == MODE_INT
5322       && (normalizep || STORE_FLAG_VALUE == 1
5323           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5324     {
5325       subtarget = target;
5326
5327       if (!target)
5328         target_mode = mode;
5329
5330       /* If the result is to be wider than OP0, it is best to convert it
5331          first.  If it is to be narrower, it is *incorrect* to convert it
5332          first.  */
5333       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5334         {
5335           op0 = convert_modes (target_mode, mode, op0, 0);
5336           mode = target_mode;
5337         }
5338
5339       if (target_mode != mode)
5340         subtarget = 0;
5341
5342       if (code == GE)
5343         op0 = expand_unop (mode, one_cmpl_optab, op0,
5344                            ((STORE_FLAG_VALUE == 1 || normalizep)
5345                             ? 0 : subtarget), 0);
5346
5347       if (STORE_FLAG_VALUE == 1 || normalizep)
5348         /* If we are supposed to produce a 0/1 value, we want to do
5349            a logical shift from the sign bit to the low-order bit; for
5350            a -1/0 value, we do an arithmetic shift.  */
5351         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5352                             GET_MODE_BITSIZE (mode) - 1,
5353                             subtarget, normalizep != -1);
5354
5355       if (mode != target_mode)
5356         op0 = convert_modes (target_mode, mode, op0, 0);
5357
5358       return op0;
5359     }
5360
5361   mclass = GET_MODE_CLASS (mode);
5362   for (compare_mode = mode; compare_mode != VOIDmode;
5363        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5364     {
5365      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5366      icode = optab_handler (cstore_optab, optab_mode);
5367      if (icode != CODE_FOR_nothing)
5368         {
5369           do_pending_stack_adjust ();
5370           tem = emit_cstore (target, icode, code, mode, compare_mode,
5371                              unsignedp, op0, op1, normalizep, target_mode);
5372           if (tem)
5373             return tem;
5374
5375           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5376             {
5377               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5378                                  unsignedp, op1, op0, normalizep, target_mode);
5379               if (tem)
5380                 return tem;
5381             }
5382           break;
5383         }
5384     }
5385
5386   return 0;
5387 }
5388
5389 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5390    and storing in TARGET.  Normally return TARGET.
5391    Return 0 if that cannot be done.
5392
5393    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5394    it is VOIDmode, they cannot both be CONST_INT.
5395
5396    UNSIGNEDP is for the case where we have to widen the operands
5397    to perform the operation.  It says to use zero-extension.
5398
5399    NORMALIZEP is 1 if we should convert the result to be either zero
5400    or one.  Normalize is -1 if we should convert the result to be
5401    either zero or -1.  If NORMALIZEP is zero, the result will be left
5402    "raw" out of the scc insn.  */
5403
5404 rtx
5405 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5406                  machine_mode mode, int unsignedp, int normalizep)
5407 {
5408   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5409   enum rtx_code rcode;
5410   rtx subtarget;
5411   rtx tem, trueval;
5412   rtx_insn *last;
5413
5414   /* If we compare constants, we shouldn't use a store-flag operation,
5415      but a constant load.  We can get there via the vanilla route that
5416      usually generates a compare-branch sequence, but will in this case
5417      fold the comparison to a constant, and thus elide the branch.  */
5418   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5419     return NULL_RTX;
5420
5421   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5422                            target_mode);
5423   if (tem)
5424     return tem;
5425
5426   /* If we reached here, we can't do this with a scc insn, however there
5427      are some comparisons that can be done in other ways.  Don't do any
5428      of these cases if branches are very cheap.  */
5429   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5430     return 0;
5431
5432   /* See what we need to return.  We can only return a 1, -1, or the
5433      sign bit.  */
5434
5435   if (normalizep == 0)
5436     {
5437       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5438         normalizep = STORE_FLAG_VALUE;
5439
5440       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5441         ;
5442       else
5443         return 0;
5444     }
5445
5446   last = get_last_insn ();
5447
5448   /* If optimizing, use different pseudo registers for each insn, instead
5449      of reusing the same pseudo.  This leads to better CSE, but slows
5450      down the compiler, since there are more pseudos */
5451   subtarget = (!optimize
5452                && (target_mode == mode)) ? target : NULL_RTX;
5453   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5454
5455   /* For floating-point comparisons, try the reverse comparison or try
5456      changing the "orderedness" of the comparison.  */
5457   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5458     {
5459       enum rtx_code first_code;
5460       bool and_them;
5461
5462       rcode = reverse_condition_maybe_unordered (code);
5463       if (can_compare_p (rcode, mode, ccp_store_flag)
5464           && (code == ORDERED || code == UNORDERED
5465               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5466               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5467         {
5468           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5469                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5470
5471           /* For the reverse comparison, use either an addition or a XOR.  */
5472           if (want_add
5473               && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5474                            optimize_insn_for_speed_p ()) == 0)
5475             {
5476               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5477                                        STORE_FLAG_VALUE, target_mode);
5478               if (tem)
5479                 return expand_binop (target_mode, add_optab, tem,
5480                                      gen_int_mode (normalizep, target_mode),
5481                                      target, 0, OPTAB_WIDEN);
5482             }
5483           else if (!want_add
5484                    && rtx_cost (trueval, XOR, 1,
5485                                 optimize_insn_for_speed_p ()) == 0)
5486             {
5487               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5488                                        normalizep, target_mode);
5489               if (tem)
5490                 return expand_binop (target_mode, xor_optab, tem, trueval,
5491                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5492             }
5493         }
5494
5495       delete_insns_since (last);
5496
5497       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5498       if (code == ORDERED || code == UNORDERED)
5499         return 0;
5500
5501       and_them = split_comparison (code, mode, &first_code, &code);
5502
5503       /* If there are no NaNs, the first comparison should always fall through.
5504          Effectively change the comparison to the other one.  */
5505       if (!HONOR_NANS (mode))
5506         {
5507           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5508           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5509                                     target_mode);
5510         }
5511
5512 #ifdef HAVE_conditional_move
5513       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5514          conditional move.  */
5515       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5516                                normalizep, target_mode);
5517       if (tem == 0)
5518         return 0;
5519
5520       if (and_them)
5521         tem = emit_conditional_move (target, code, op0, op1, mode,
5522                                      tem, const0_rtx, GET_MODE (tem), 0);
5523       else
5524         tem = emit_conditional_move (target, code, op0, op1, mode,
5525                                      trueval, tem, GET_MODE (tem), 0);
5526
5527       if (tem == 0)
5528         delete_insns_since (last);
5529       return tem;
5530 #else
5531       return 0;
5532 #endif
5533     }
5534
5535   /* The remaining tricks only apply to integer comparisons.  */
5536
5537   if (GET_MODE_CLASS (mode) != MODE_INT)
5538     return 0;
5539
5540   /* If this is an equality comparison of integers, we can try to exclusive-or
5541      (or subtract) the two operands and use a recursive call to try the
5542      comparison with zero.  Don't do any of these cases if branches are
5543      very cheap.  */
5544
5545   if ((code == EQ || code == NE) && op1 != const0_rtx)
5546     {
5547       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5548                           OPTAB_WIDEN);
5549
5550       if (tem == 0)
5551         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5552                             OPTAB_WIDEN);
5553       if (tem != 0)
5554         tem = emit_store_flag (target, code, tem, const0_rtx,
5555                                mode, unsignedp, normalizep);
5556       if (tem != 0)
5557         return tem;
5558
5559       delete_insns_since (last);
5560     }
5561
5562   /* For integer comparisons, try the reverse comparison.  However, for
5563      small X and if we'd have anyway to extend, implementing "X != 0"
5564      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5565   rcode = reverse_condition (code);
5566   if (can_compare_p (rcode, mode, ccp_store_flag)
5567       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5568             && code == NE
5569             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5570             && op1 == const0_rtx))
5571     {
5572       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5573                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5574
5575       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5576       if (want_add
5577           && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5578                        optimize_insn_for_speed_p ()) == 0)
5579         {
5580           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5581                                    STORE_FLAG_VALUE, target_mode);
5582           if (tem != 0)
5583             tem = expand_binop (target_mode, add_optab, tem,
5584                                 gen_int_mode (normalizep, target_mode),
5585                                 target, 0, OPTAB_WIDEN);
5586         }
5587       else if (!want_add
5588                && rtx_cost (trueval, XOR, 1,
5589                             optimize_insn_for_speed_p ()) == 0)
5590         {
5591           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5592                                    normalizep, target_mode);
5593           if (tem != 0)
5594             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5595                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5596         }
5597
5598       if (tem != 0)
5599         return tem;
5600       delete_insns_since (last);
5601     }
5602
5603   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5604      the constant zero.  Reject all other comparisons at this point.  Only
5605      do LE and GT if branches are expensive since they are expensive on
5606      2-operand machines.  */
5607
5608   if (op1 != const0_rtx
5609       || (code != EQ && code != NE
5610           && (BRANCH_COST (optimize_insn_for_speed_p (),
5611                            false) <= 1 || (code != LE && code != GT))))
5612     return 0;
5613
5614   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5615      do the necessary operation below.  */
5616
5617   tem = 0;
5618
5619   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5620      the sign bit set.  */
5621
5622   if (code == LE)
5623     {
5624       /* This is destructive, so SUBTARGET can't be OP0.  */
5625       if (rtx_equal_p (subtarget, op0))
5626         subtarget = 0;
5627
5628       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5629                           OPTAB_WIDEN);
5630       if (tem)
5631         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5632                             OPTAB_WIDEN);
5633     }
5634
5635   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5636      number of bits in the mode of OP0, minus one.  */
5637
5638   if (code == GT)
5639     {
5640       if (rtx_equal_p (subtarget, op0))
5641         subtarget = 0;
5642
5643       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5644                           GET_MODE_BITSIZE (mode) - 1,
5645                           subtarget, 0);
5646       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5647                           OPTAB_WIDEN);
5648     }
5649
5650   if (code == EQ || code == NE)
5651     {
5652       /* For EQ or NE, one way to do the comparison is to apply an operation
5653          that converts the operand into a positive number if it is nonzero
5654          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5655          for NE we negate.  This puts the result in the sign bit.  Then we
5656          normalize with a shift, if needed.
5657
5658          Two operations that can do the above actions are ABS and FFS, so try
5659          them.  If that doesn't work, and MODE is smaller than a full word,
5660          we can use zero-extension to the wider mode (an unsigned conversion)
5661          as the operation.  */
5662
5663       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5664          that is compensated by the subsequent overflow when subtracting
5665          one / negating.  */
5666
5667       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5668         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5669       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5670         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5671       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5672         {
5673           tem = convert_modes (word_mode, mode, op0, 1);
5674           mode = word_mode;
5675         }
5676
5677       if (tem != 0)
5678         {
5679           if (code == EQ)
5680             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5681                                 0, OPTAB_WIDEN);
5682           else
5683             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5684         }
5685
5686       /* If we couldn't do it that way, for NE we can "or" the two's complement
5687          of the value with itself.  For EQ, we take the one's complement of
5688          that "or", which is an extra insn, so we only handle EQ if branches
5689          are expensive.  */
5690
5691       if (tem == 0
5692           && (code == NE
5693               || BRANCH_COST (optimize_insn_for_speed_p (),
5694                               false) > 1))
5695         {
5696           if (rtx_equal_p (subtarget, op0))
5697             subtarget = 0;
5698
5699           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5700           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5701                               OPTAB_WIDEN);
5702
5703           if (tem && code == EQ)
5704             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5705         }
5706     }
5707
5708   if (tem && normalizep)
5709     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5710                         GET_MODE_BITSIZE (mode) - 1,
5711                         subtarget, normalizep == 1);
5712
5713   if (tem)
5714     {
5715       if (!target)
5716         ;
5717       else if (GET_MODE (tem) != target_mode)
5718         {
5719           convert_move (target, tem, 0);
5720           tem = target;
5721         }
5722       else if (!subtarget)
5723         {
5724           emit_move_insn (target, tem);
5725           tem = target;
5726         }
5727     }
5728   else
5729     delete_insns_since (last);
5730
5731   return tem;
5732 }
5733
5734 /* Like emit_store_flag, but always succeeds.  */
5735
5736 rtx
5737 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5738                        machine_mode mode, int unsignedp, int normalizep)
5739 {
5740   rtx tem;
5741   rtx_code_label *label;
5742   rtx trueval, falseval;
5743
5744   /* First see if emit_store_flag can do the job.  */
5745   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5746   if (tem != 0)
5747     return tem;
5748
5749   if (!target)
5750     target = gen_reg_rtx (word_mode);
5751
5752   /* If this failed, we have to do this with set/compare/jump/set code.
5753      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5754   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5755   if (code == NE
5756       && GET_MODE_CLASS (mode) == MODE_INT
5757       && REG_P (target)
5758       && op0 == target
5759       && op1 == const0_rtx)
5760     {
5761       label = gen_label_rtx ();
5762       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5763                                mode, NULL_RTX, NULL_RTX, label, -1);
5764       emit_move_insn (target, trueval);
5765       emit_label (label);
5766       return target;
5767     }
5768
5769   if (!REG_P (target)
5770       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5771     target = gen_reg_rtx (GET_MODE (target));
5772
5773   /* Jump in the right direction if the target cannot implement CODE
5774      but can jump on its reverse condition.  */
5775   falseval = const0_rtx;
5776   if (! can_compare_p (code, mode, ccp_jump)
5777       && (! FLOAT_MODE_P (mode)
5778           || code == ORDERED || code == UNORDERED
5779           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5780           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5781     {
5782       enum rtx_code rcode;
5783       if (FLOAT_MODE_P (mode))
5784         rcode = reverse_condition_maybe_unordered (code);
5785       else
5786         rcode = reverse_condition (code);
5787
5788       /* Canonicalize to UNORDERED for the libcall.  */
5789       if (can_compare_p (rcode, mode, ccp_jump)
5790           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5791         {
5792           falseval = trueval;
5793           trueval = const0_rtx;
5794           code = rcode;
5795         }
5796     }
5797
5798   emit_move_insn (target, trueval);
5799   label = gen_label_rtx ();
5800   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5801                            NULL_RTX, label, -1);
5802
5803   emit_move_insn (target, falseval);
5804   emit_label (label);
5805
5806   return target;
5807 }
5808 \f
5809 /* Perform possibly multi-word comparison and conditional jump to LABEL
5810    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5811    now a thin wrapper around do_compare_rtx_and_jump.  */
5812
5813 static void
5814 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
5815                  rtx_code_label *label)
5816 {
5817   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5818   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5819                            NULL_RTX, NULL_RTX, label, -1);
5820 }