gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2014 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "diagnostic-core.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "stor-layout.h"
  30 #include "tm_p.h"
  31 #include "flags.h"
  32 #include "insn-config.h"
  33 #include "expr.h"
  34 #include "insn-codes.h"
  35 #include "optabs.h"
  36 #include "recog.h"
  37 #include "langhooks.h"
  38 #include "predict.h"
  39 #include "basic-block.h"
  40 #include "df.h"
  41 #include "target.h"
  42 #include "expmed.h"
  43
  44 struct target_expmed default_target_expmed;
  45 #if SWITCHABLE_TARGET
  46 struct target_expmed *this_target_expmed = &default_target_expmed;
  47 #endif
  48
  49 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  50                                    unsigned HOST_WIDE_INT,
  51                                    unsigned HOST_WIDE_INT,
  52                                    unsigned HOST_WIDE_INT,
  53                                    rtx);
  54 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
  55                                      unsigned HOST_WIDE_INT,
  56                                      rtx);
  57 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  58                                    unsigned HOST_WIDE_INT,
  59                                    unsigned HOST_WIDE_INT,
  60                                    unsigned HOST_WIDE_INT,
  61                                    rtx);
  62 static rtx extract_fixed_bit_field (machine_mode, rtx,
  63                                     unsigned HOST_WIDE_INT,
  64                                     unsigned HOST_WIDE_INT, rtx, int);
  65 static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
  66                                       unsigned HOST_WIDE_INT,
  67                                       unsigned HOST_WIDE_INT, rtx, int);
  68 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  69 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  70                                     unsigned HOST_WIDE_INT, int);
  71 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  72 static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  73 static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  74
  75 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  76    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  77    The mask is truncated if necessary to the width of mode MODE.  The
  78    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
  79
  80 static inline rtx
  81 mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
  82 {
  83   return immed_wide_int_const
  84     (wi::shifted_mask (bitpos, bitsize, complement,
  85                        GET_MODE_PRECISION (mode)), mode);
  86 }
  87
  88 /* Test whether a value is zero of a power of two.  */
  89 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
  90   (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
  91
  92 struct init_expmed_rtl
  93 {
  94   rtx reg;
  95   rtx plus;
  96   rtx neg;
  97   rtx mult;
  98   rtx sdiv;
  99   rtx udiv;
 100   rtx sdiv_32;
 101   rtx smod_32;
 102   rtx wide_mult;
 103   rtx wide_lshr;
 104   rtx wide_trunc;
 105   rtx shift;
 106   rtx shift_mult;
 107   rtx shift_add;
 108   rtx shift_sub0;
 109   rtx shift_sub1;
 110   rtx zext;
 111   rtx trunc;
 112
 113   rtx pow2[MAX_BITS_PER_WORD];
 114   rtx cint[MAX_BITS_PER_WORD];
 115 };
 116
 117 static void
 118 init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
 119                       machine_mode from_mode, bool speed)
 120 {
 121   int to_size, from_size;
 122   rtx which;
 123
 124   to_size = GET_MODE_PRECISION (to_mode);
 125   from_size = GET_MODE_PRECISION (from_mode);
 126
 127   /* Most partial integers have a precision less than the "full"
 128      integer it requires for storage.  In case one doesn't, for
 129      comparison purposes here, reduce the bit size by one in that
 130      case.  */
 131   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 132       && exact_log2 (to_size) != -1)
 133     to_size --;
 134   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 135       && exact_log2 (from_size) != -1)
 136     from_size --;
 137
 138   /* Assume cost of zero-extend and sign-extend is the same.  */
 139   which = (to_size < from_size ? all->trunc : all->zext);
 140
 141   PUT_MODE (all->reg, from_mode);
 142   set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
 143 }
 144
 145 static void
 146 init_expmed_one_mode (struct init_expmed_rtl *all,
 147                       machine_mode mode, int speed)
 148 {
 149   int m, n, mode_bitsize;
 150   machine_mode mode_from;
 151
 152   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 153
 154   PUT_MODE (all->reg, mode);
 155   PUT_MODE (all->plus, mode);
 156   PUT_MODE (all->neg, mode);
 157   PUT_MODE (all->mult, mode);
 158   PUT_MODE (all->sdiv, mode);
 159   PUT_MODE (all->udiv, mode);
 160   PUT_MODE (all->sdiv_32, mode);
 161   PUT_MODE (all->smod_32, mode);
 162   PUT_MODE (all->wide_trunc, mode);
 163   PUT_MODE (all->shift, mode);
 164   PUT_MODE (all->shift_mult, mode);
 165   PUT_MODE (all->shift_add, mode);
 166   PUT_MODE (all->shift_sub0, mode);
 167   PUT_MODE (all->shift_sub1, mode);
 168   PUT_MODE (all->zext, mode);
 169   PUT_MODE (all->trunc, mode);
 170
 171   set_add_cost (speed, mode, set_src_cost (all->plus, speed));
 172   set_neg_cost (speed, mode, set_src_cost (all->neg, speed));
 173   set_mul_cost (speed, mode, set_src_cost (all->mult, speed));
 174   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, speed));
 175   set_udiv_cost (speed, mode, set_src_cost (all->udiv, speed));
 176
 177   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, speed)
 178                                      <= 2 * add_cost (speed, mode)));
 179   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, speed)
 180                                      <= 4 * add_cost (speed, mode)));
 181
 182   set_shift_cost (speed, mode, 0, 0);
 183   {
 184     int cost = add_cost (speed, mode);
 185     set_shiftadd_cost (speed, mode, 0, cost);
 186     set_shiftsub0_cost (speed, mode, 0, cost);
 187     set_shiftsub1_cost (speed, mode, 0, cost);
 188   }
 189
 190   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 191   for (m = 1; m < n; m++)
 192     {
 193       XEXP (all->shift, 1) = all->cint[m];
 194       XEXP (all->shift_mult, 1) = all->pow2[m];
 195
 196       set_shift_cost (speed, mode, m, set_src_cost (all->shift, speed));
 197       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, speed));
 198       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, speed));
 199       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, speed));
 200     }
 201
 202   if (SCALAR_INT_MODE_P (mode))
 203     {
 204       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 205            mode_from = (machine_mode)(mode_from + 1))
 206         init_expmed_one_conv (all, mode, mode_from, speed);
 207     }
 208   if (GET_MODE_CLASS (mode) == MODE_INT)
 209     {
 210       machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
 211       if (wider_mode != VOIDmode)
 212         {
 213           PUT_MODE (all->zext, wider_mode);
 214           PUT_MODE (all->wide_mult, wider_mode);
 215           PUT_MODE (all->wide_lshr, wider_mode);
 216           XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 217
 218           set_mul_widen_cost (speed, wider_mode,
 219                               set_src_cost (all->wide_mult, speed));
 220           set_mul_highpart_cost (speed, mode,
 221                                  set_src_cost (all->wide_trunc, speed));
 222         }
 223     }
 224 }
 225
 226 void
 227 init_expmed (void)
 228 {
 229   struct init_expmed_rtl all;
 230   machine_mode mode = QImode;
 231   int m, speed;
 232
 233   memset (&all, 0, sizeof all);
 234   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 235     {
 236       all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 237       all.cint[m] = GEN_INT (m);
 238     }
 239
 240   /* Avoid using hard regs in ways which may be unsupported.  */
 241   all.reg = gen_rtx_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 242   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 243   all.neg = gen_rtx_NEG (mode, all.reg);
 244   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 245   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 246   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 247   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 248   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 249   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 250   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 251   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 252   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 253   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 254   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 255   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 256   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 257   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 258   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 259
 260   for (speed = 0; speed < 2; speed++)
 261     {
 262       crtl->maybe_hot_insn_p = speed;
 263       set_zero_cost (speed, set_src_cost (const0_rtx, speed));
 264
 265       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 266            mode = (machine_mode)(mode + 1))
 267         init_expmed_one_mode (&all, mode, speed);
 268
 269       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 270         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 271              mode = (machine_mode)(mode + 1))
 272           init_expmed_one_mode (&all, mode, speed);
 273
 274       if (MIN_MODE_VECTOR_INT != VOIDmode)
 275         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 276              mode = (machine_mode)(mode + 1))
 277           init_expmed_one_mode (&all, mode, speed);
 278     }
 279
 280   if (alg_hash_used_p ())
 281     {
 282       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 283       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 284     }
 285   else
 286     set_alg_hash_used_p (true);
 287   default_rtl_profile ();
 288
 289   ggc_free (all.trunc);
 290   ggc_free (all.shift_sub1);
 291   ggc_free (all.shift_sub0);
 292   ggc_free (all.shift_add);
 293   ggc_free (all.shift_mult);
 294   ggc_free (all.shift);
 295   ggc_free (all.wide_trunc);
 296   ggc_free (all.wide_lshr);
 297   ggc_free (all.wide_mult);
 298   ggc_free (all.zext);
 299   ggc_free (all.smod_32);
 300   ggc_free (all.sdiv_32);
 301   ggc_free (all.udiv);
 302   ggc_free (all.sdiv);
 303   ggc_free (all.mult);
 304   ggc_free (all.neg);
 305   ggc_free (all.plus);
 306   ggc_free (all.reg);
 307 }
 308
 309 /* Return an rtx representing minus the value of X.
 310    MODE is the intended mode of the result,
 311    useful if X is a CONST_INT.  */
 312
 313 rtx
 314 negate_rtx (machine_mode mode, rtx x)
 315 {
 316   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 317
 318   if (result == 0)
 319     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 320
 321   return result;
 322 }
 323
 324 /* Adjust bitfield memory MEM so that it points to the first unit of mode
 325    MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
 326    If MODE is BLKmode, return a reference to every byte in the bitfield.
 327    Set *NEW_BITNUM to the bit position of the field within the new memory.  */
 328
 329 static rtx
 330 narrow_bit_field_mem (rtx mem, machine_mode mode,
 331                       unsigned HOST_WIDE_INT bitsize,
 332                       unsigned HOST_WIDE_INT bitnum,
 333                       unsigned HOST_WIDE_INT *new_bitnum)
 334 {
 335   if (mode == BLKmode)
 336     {
 337       *new_bitnum = bitnum % BITS_PER_UNIT;
 338       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 339       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 340                             / BITS_PER_UNIT);
 341       return adjust_bitfield_address_size (mem, mode, offset, size);
 342     }
 343   else
 344     {
 345       unsigned int unit = GET_MODE_BITSIZE (mode);
 346       *new_bitnum = bitnum % unit;
 347       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 348       return adjust_bitfield_address (mem, mode, offset);
 349     }
 350 }
 351
 352 /* The caller wants to perform insertion or extraction PATTERN on a
 353    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 354    BITREGION_START and BITREGION_END are as for store_bit_field
 355    and FIELDMODE is the natural mode of the field.
 356
 357    Search for a mode that is compatible with the memory access
 358    restrictions and (where applicable) with a register insertion or
 359    extraction.  Return the new memory on success, storing the adjusted
 360    bit position in *NEW_BITNUM.  Return null otherwise.  */
 361
 362 static rtx
 363 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 364                               rtx op0, HOST_WIDE_INT bitsize,
 365                               HOST_WIDE_INT bitnum,
 366                               unsigned HOST_WIDE_INT bitregion_start,
 367                               unsigned HOST_WIDE_INT bitregion_end,
 368                               machine_mode fieldmode,
 369                               unsigned HOST_WIDE_INT *new_bitnum)
 370 {
 371   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 372                                 bitregion_end, MEM_ALIGN (op0),
 373                                 MEM_VOLATILE_P (op0));
 374   machine_mode best_mode;
 375   if (iter.next_mode (&best_mode))
 376     {
 377       /* We can use a memory in BEST_MODE.  See whether this is true for
 378          any wider modes.  All other things being equal, we prefer to
 379          use the widest mode possible because it tends to expose more
 380          CSE opportunities.  */
 381       if (!iter.prefer_smaller_modes ())
 382         {
 383           /* Limit the search to the mode required by the corresponding
 384              register insertion or extraction instruction, if any.  */
 385           machine_mode limit_mode = word_mode;
 386           extraction_insn insn;
 387           if (get_best_reg_extraction_insn (&insn, pattern,
 388                                             GET_MODE_BITSIZE (best_mode),
 389                                             fieldmode))
 390             limit_mode = insn.field_mode;
 391
 392           machine_mode wider_mode;
 393           while (iter.next_mode (&wider_mode)
 394                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 395             best_mode = wider_mode;
 396         }
 397       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 398                                    new_bitnum);
 399     }
 400   return NULL_RTX;
 401 }
 402
 403 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 404    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 405    offset is then BITNUM / BITS_PER_UNIT.  */
 406
 407 static bool
 408 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 409                      unsigned HOST_WIDE_INT bitsize,
 410                      machine_mode struct_mode)
 411 {
 412   if (BYTES_BIG_ENDIAN)
 413     return (bitnum % BITS_PER_UNIT == 0
 414             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 415                 || (bitnum + bitsize) % BITS_PER_WORD == 0));
 416   else
 417     return bitnum % BITS_PER_WORD == 0;
 418 }
 419
 420 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 421    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 422    Return false if the access would touch memory outside the range
 423    BITREGION_START to BITREGION_END for conformance to the C++ memory
 424    model.  */
 425
 426 static bool
 427 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 428                             unsigned HOST_WIDE_INT bitnum,
 429                             machine_mode fieldmode,
 430                             unsigned HOST_WIDE_INT bitregion_start,
 431                             unsigned HOST_WIDE_INT bitregion_end)
 432 {
 433   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 434
 435   /* -fstrict-volatile-bitfields must be enabled and we must have a
 436      volatile MEM.  */
 437   if (!MEM_P (op0)
 438       || !MEM_VOLATILE_P (op0)
 439       || flag_strict_volatile_bitfields <= 0)
 440     return false;
 441
 442   /* Non-integral modes likely only happen with packed structures.
 443      Punt.  */
 444   if (!SCALAR_INT_MODE_P (fieldmode))
 445     return false;
 446
 447   /* The bit size must not be larger than the field mode, and
 448      the field mode must not be larger than a word.  */
 449   if (bitsize > modesize || modesize > BITS_PER_WORD)
 450     return false;
 451
 452   /* Check for cases of unaligned fields that must be split.  */
 453   if (bitnum % BITS_PER_UNIT + bitsize > modesize
 454       || (STRICT_ALIGNMENT
 455           && bitnum % GET_MODE_ALIGNMENT (fieldmode) + bitsize > modesize))
 456     return false;
 457
 458   /* Check for cases where the C++ memory model applies.  */
 459   if (bitregion_end != 0
 460       && (bitnum - bitnum % modesize < bitregion_start
 461           || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
 462     return false;
 463
 464   return true;
 465 }
 466
 467 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 468    bit number BITNUM can be treated as a simple value of mode MODE.  */
 469
 470 static bool
 471 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 472                        unsigned HOST_WIDE_INT bitnum, machine_mode mode)
 473 {
 474   return (MEM_P (op0)
 475           && bitnum % BITS_PER_UNIT == 0
 476           && bitsize == GET_MODE_BITSIZE (mode)
 477           && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
 478               || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
 479                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 480 }
 481 \f
 482 /* Try to use instruction INSV to store VALUE into a field of OP0.
 483    BITSIZE and BITNUM are as for store_bit_field.  */
 484
 485 static bool
 486 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 487                             unsigned HOST_WIDE_INT bitsize,
 488                             unsigned HOST_WIDE_INT bitnum,
 489                             rtx value)
 490 {
 491   struct expand_operand ops[4];
 492   rtx value1;
 493   rtx xop0 = op0;
 494   rtx_insn *last = get_last_insn ();
 495   bool copy_back = false;
 496
 497   machine_mode op_mode = insv->field_mode;
 498   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 499   if (bitsize == 0 || bitsize > unit)
 500     return false;
 501
 502   if (MEM_P (xop0))
 503     /* Get a reference to the first byte of the field.  */
 504     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 505                                  &bitnum);
 506   else
 507     {
 508       /* Convert from counting within OP0 to counting in OP_MODE.  */
 509       if (BYTES_BIG_ENDIAN)
 510         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 511
 512       /* If xop0 is a register, we need it in OP_MODE
 513          to make it acceptable to the format of insv.  */
 514       if (GET_CODE (xop0) == SUBREG)
 515         /* We can't just change the mode, because this might clobber op0,
 516            and we will need the original value of op0 if insv fails.  */
 517         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 518       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 519         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 520     }
 521
 522   /* If the destination is a paradoxical subreg such that we need a
 523      truncate to the inner mode, perform the insertion on a temporary and
 524      truncate the result to the original destination.  Note that we can't
 525      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 526      X) 0)) is (reg:N X).  */
 527   if (GET_CODE (xop0) == SUBREG
 528       && REG_P (SUBREG_REG (xop0))
 529       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 530                                          op_mode))
 531     {
 532       rtx tem = gen_reg_rtx (op_mode);
 533       emit_move_insn (tem, xop0);
 534       xop0 = tem;
 535       copy_back = true;
 536     }
 537
 538   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 539      "backwards" from the size of the unit we are inserting into.
 540      Otherwise, we count bits from the most significant on a
 541      BYTES/BITS_BIG_ENDIAN machine.  */
 542
 543   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 544     bitnum = unit - bitsize - bitnum;
 545
 546   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 547   value1 = value;
 548   if (GET_MODE (value) != op_mode)
 549     {
 550       if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 551         {
 552           /* Optimization: Don't bother really extending VALUE
 553              if it has all the bits we will actually use.  However,
 554              if we must narrow it, be sure we do it correctly.  */
 555
 556           if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 557             {
 558               rtx tmp;
 559
 560               tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 561               if (! tmp)
 562                 tmp = simplify_gen_subreg (op_mode,
 563                                            force_reg (GET_MODE (value),
 564                                                       value1),
 565                                            GET_MODE (value), 0);
 566               value1 = tmp;
 567             }
 568           else
 569             value1 = gen_lowpart (op_mode, value1);
 570         }
 571       else if (CONST_INT_P (value))
 572         value1 = gen_int_mode (INTVAL (value), op_mode);
 573       else
 574         /* Parse phase is supposed to make VALUE's data type
 575            match that of the component reference, which is a type
 576            at least as wide as the field; so VALUE should have
 577            a mode that corresponds to that type.  */
 578         gcc_assert (CONSTANT_P (value));
 579     }
 580
 581   create_fixed_operand (&ops[0], xop0);
 582   create_integer_operand (&ops[1], bitsize);
 583   create_integer_operand (&ops[2], bitnum);
 584   create_input_operand (&ops[3], value1, op_mode);
 585   if (maybe_expand_insn (insv->icode, 4, ops))
 586     {
 587       if (copy_back)
 588         convert_move (op0, xop0, true);
 589       return true;
 590     }
 591   delete_insns_since (last);
 592   return false;
 593 }
 594
 595 /* A subroutine of store_bit_field, with the same arguments.  Return true
 596    if the operation could be implemented.
 597
 598    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 599    no other way of implementing the operation.  If FALLBACK_P is false,
 600    return false instead.  */
 601
 602 static bool
 603 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 604                    unsigned HOST_WIDE_INT bitnum,
 605                    unsigned HOST_WIDE_INT bitregion_start,
 606                    unsigned HOST_WIDE_INT bitregion_end,
 607                    machine_mode fieldmode,
 608                    rtx value, bool fallback_p)
 609 {
 610   rtx op0 = str_rtx;
 611   rtx orig_value;
 612
 613   while (GET_CODE (op0) == SUBREG)
 614     {
 615       /* The following line once was done only if WORDS_BIG_ENDIAN,
 616          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 617          meaningful at a much higher level; when structures are copied
 618          between memory and regs, the higher-numbered regs
 619          always get higher addresses.  */
 620       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 621       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 622       int byte_offset = 0;
 623
 624       /* Paradoxical subregs need special handling on big endian machines.  */
 625       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 626         {
 627           int difference = inner_mode_size - outer_mode_size;
 628
 629           if (WORDS_BIG_ENDIAN)
 630             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 631           if (BYTES_BIG_ENDIAN)
 632             byte_offset += difference % UNITS_PER_WORD;
 633         }
 634       else
 635         byte_offset = SUBREG_BYTE (op0);
 636
 637       bitnum += byte_offset * BITS_PER_UNIT;
 638       op0 = SUBREG_REG (op0);
 639     }
 640
 641   /* No action is needed if the target is a register and if the field
 642      lies completely outside that register.  This can occur if the source
 643      code contains an out-of-bounds access to a small array.  */
 644   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 645     return true;
 646
 647   /* Use vec_set patterns for inserting parts of vectors whenever
 648      available.  */
 649   if (VECTOR_MODE_P (GET_MODE (op0))
 650       && !MEM_P (op0)
 651       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 652       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 653       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 654       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 655     {
 656       struct expand_operand ops[3];
 657       machine_mode outermode = GET_MODE (op0);
 658       machine_mode innermode = GET_MODE_INNER (outermode);
 659       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 660       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 661
 662       create_fixed_operand (&ops[0], op0);
 663       create_input_operand (&ops[1], value, innermode);
 664       create_integer_operand (&ops[2], pos);
 665       if (maybe_expand_insn (icode, 3, ops))
 666         return true;
 667     }
 668
 669   /* If the target is a register, overwriting the entire object, or storing
 670      a full-word or multi-word field can be done with just a SUBREG.  */
 671   if (!MEM_P (op0)
 672       && bitsize == GET_MODE_BITSIZE (fieldmode)
 673       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 674           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 675     {
 676       /* Use the subreg machinery either to narrow OP0 to the required
 677          words or to cope with mode punning between equal-sized modes.
 678          In the latter case, use subreg on the rhs side, not lhs.  */
 679       rtx sub;
 680
 681       if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
 682         {
 683           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 684           if (sub)
 685             {
 686               emit_move_insn (op0, sub);
 687               return true;
 688             }
 689         }
 690       else
 691         {
 692           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 693                                      bitnum / BITS_PER_UNIT);
 694           if (sub)
 695             {
 696               emit_move_insn (sub, value);
 697               return true;
 698             }
 699         }
 700     }
 701
 702   /* If the target is memory, storing any naturally aligned field can be
 703      done with a simple store.  For targets that support fast unaligned
 704      memory, any naturally sized, unit aligned field can be done directly.  */
 705   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
 706     {
 707       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 708       emit_move_insn (op0, value);
 709       return true;
 710     }
 711
 712   /* Make sure we are playing with integral modes.  Pun with subregs
 713      if we aren't.  This must come after the entire register case above,
 714      since that case is valid for any mode.  The following cases are only
 715      valid for integral modes.  */
 716   {
 717     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 718     if (imode != GET_MODE (op0))
 719       {
 720         if (MEM_P (op0))
 721           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
 722         else
 723           {
 724             gcc_assert (imode != BLKmode);
 725             op0 = gen_lowpart (imode, op0);
 726           }
 727       }
 728   }
 729
 730   /* Storing an lsb-aligned field in a register
 731      can be done with a movstrict instruction.  */
 732
 733   if (!MEM_P (op0)
 734       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 735       && bitsize == GET_MODE_BITSIZE (fieldmode)
 736       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 737     {
 738       struct expand_operand ops[2];
 739       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 740       rtx arg0 = op0;
 741       unsigned HOST_WIDE_INT subreg_off;
 742
 743       if (GET_CODE (arg0) == SUBREG)
 744         {
 745           /* Else we've got some float mode source being extracted into
 746              a different float mode destination -- this combination of
 747              subregs results in Severe Tire Damage.  */
 748           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 749                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 750                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 751           arg0 = SUBREG_REG (arg0);
 752         }
 753
 754       subreg_off = bitnum / BITS_PER_UNIT;
 755       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 756         {
 757           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 758
 759           create_fixed_operand (&ops[0], arg0);
 760           /* Shrink the source operand to FIELDMODE.  */
 761           create_convert_operand_to (&ops[1], value, fieldmode, false);
 762           if (maybe_expand_insn (icode, 2, ops))
 763             return true;
 764         }
 765     }
 766
 767   /* Handle fields bigger than a word.  */
 768
 769   if (bitsize > BITS_PER_WORD)
 770     {
 771       /* Here we transfer the words of the field
 772          in the order least significant first.
 773          This is because the most significant word is the one which may
 774          be less than full.
 775          However, only do that if the value is not BLKmode.  */
 776
 777       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 778       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 779       unsigned int i;
 780       rtx_insn *last;
 781
 782       /* This is the mode we must force value to, so that there will be enough
 783          subwords to extract.  Note that fieldmode will often (always?) be
 784          VOIDmode, because that is what store_field uses to indicate that this
 785          is a bit field, but passing VOIDmode to operand_subword_force
 786          is not allowed.  */
 787       fieldmode = GET_MODE (value);
 788       if (fieldmode == VOIDmode)
 789         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 790
 791       last = get_last_insn ();
 792       for (i = 0; i < nwords; i++)
 793         {
 794           /* If I is 0, use the low-order word in both field and target;
 795              if I is 1, use the next to lowest word; and so on.  */
 796           unsigned int wordnum = (backwards
 797                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 798                                   - i - 1
 799                                   : i);
 800           unsigned int bit_offset = (backwards
 801                                      ? MAX ((int) bitsize - ((int) i + 1)
 802                                             * BITS_PER_WORD,
 803                                             0)
 804                                      : (int) i * BITS_PER_WORD);
 805           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 806           unsigned HOST_WIDE_INT new_bitsize =
 807             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 808
 809           /* If the remaining chunk doesn't have full wordsize we have
 810              to make sure that for big endian machines the higher order
 811              bits are used.  */
 812           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 813             value_word = simplify_expand_binop (word_mode, lshr_optab,
 814                                                 value_word,
 815                                                 GEN_INT (BITS_PER_WORD
 816                                                          - new_bitsize),
 817                                                 NULL_RTX, true,
 818                                                 OPTAB_LIB_WIDEN);
 819
 820           if (!store_bit_field_1 (op0, new_bitsize,
 821                                   bitnum + bit_offset,
 822                                   bitregion_start, bitregion_end,
 823                                   word_mode,
 824                                   value_word, fallback_p))
 825             {
 826               delete_insns_since (last);
 827               return false;
 828             }
 829         }
 830       return true;
 831     }
 832
 833   /* If VALUE has a floating-point or complex mode, access it as an
 834      integer of the corresponding size.  This can occur on a machine
 835      with 64 bit registers that uses SFmode for float.  It can also
 836      occur for unaligned float or complex fields.  */
 837   orig_value = value;
 838   if (GET_MODE (value) != VOIDmode
 839       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 840       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 841     {
 842       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 843       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 844     }
 845
 846   /* If OP0 is a multi-word register, narrow it to the affected word.
 847      If the region spans two words, defer to store_split_bit_field.  */
 848   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 849     {
 850       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
 851                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 852       gcc_assert (op0);
 853       bitnum %= BITS_PER_WORD;
 854       if (bitnum + bitsize > BITS_PER_WORD)
 855         {
 856           if (!fallback_p)
 857             return false;
 858
 859           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 860                                  bitregion_end, value);
 861           return true;
 862         }
 863     }
 864
 865   /* From here on we can assume that the field to be stored in fits
 866      within a word.  If the destination is a register, it too fits
 867      in a word.  */
 868
 869   extraction_insn insv;
 870   if (!MEM_P (op0)
 871       && get_best_reg_extraction_insn (&insv, EP_insv,
 872                                        GET_MODE_BITSIZE (GET_MODE (op0)),
 873                                        fieldmode)
 874       && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 875     return true;
 876
 877   /* If OP0 is a memory, try copying it to a register and seeing if a
 878      cheap register alternative is available.  */
 879   if (MEM_P (op0))
 880     {
 881       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
 882                                         fieldmode)
 883           && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 884         return true;
 885
 886       rtx_insn *last = get_last_insn ();
 887
 888       /* Try loading part of OP0 into a register, inserting the bitfield
 889          into that, and then copying the result back to OP0.  */
 890       unsigned HOST_WIDE_INT bitpos;
 891       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
 892                                                bitregion_start, bitregion_end,
 893                                                fieldmode, &bitpos);
 894       if (xop0)
 895         {
 896           rtx tempreg = copy_to_reg (xop0);
 897           if (store_bit_field_1 (tempreg, bitsize, bitpos,
 898                                  bitregion_start, bitregion_end,
 899                                  fieldmode, orig_value, false))
 900             {
 901               emit_move_insn (xop0, tempreg);
 902               return true;
 903             }
 904           delete_insns_since (last);
 905         }
 906     }
 907
 908   if (!fallback_p)
 909     return false;
 910
 911   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
 912                          bitregion_end, value);
 913   return true;
 914 }
 915
 916 /* Generate code to store value from rtx VALUE
 917    into a bit-field within structure STR_RTX
 918    containing BITSIZE bits starting at bit BITNUM.
 919
 920    BITREGION_START is bitpos of the first bitfield in this region.
 921    BITREGION_END is the bitpos of the ending bitfield in this region.
 922    These two fields are 0, if the C++ memory model does not apply,
 923    or we are not interested in keeping track of bitfield regions.
 924
 925    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 926
 927 void
 928 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 929                  unsigned HOST_WIDE_INT bitnum,
 930                  unsigned HOST_WIDE_INT bitregion_start,
 931                  unsigned HOST_WIDE_INT bitregion_end,
 932                  machine_mode fieldmode,
 933                  rtx value)
 934 {
 935   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
 936   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
 937                                   bitregion_start, bitregion_end))
 938     {
 939       /* Storing any naturally aligned field can be done with a simple
 940          store.  For targets that support fast unaligned memory, any
 941          naturally sized, unit aligned field can be done directly.  */
 942       if (simple_mem_bitfield_p (str_rtx, bitsize, bitnum, fieldmode))
 943         {
 944           str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
 945                                              bitnum / BITS_PER_UNIT);
 946           emit_move_insn (str_rtx, value);
 947         }
 948       else
 949         {
 950           str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
 951                                           &bitnum);
 952           /* Explicitly override the C/C++ memory model; ignore the
 953              bit range so that we can do the access in the mode mandated
 954              by -fstrict-volatile-bitfields instead.  */
 955           store_fixed_bit_field_1 (str_rtx, bitsize, bitnum, value);
 956         }
 957
 958       return;
 959     }
 960
 961   /* Under the C++0x memory model, we must not touch bits outside the
 962      bit region.  Adjust the address to start at the beginning of the
 963      bit region.  */
 964   if (MEM_P (str_rtx) && bitregion_start > 0)
 965     {
 966       machine_mode bestmode;
 967       HOST_WIDE_INT offset, size;
 968
 969       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
 970
 971       offset = bitregion_start / BITS_PER_UNIT;
 972       bitnum -= bitregion_start;
 973       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
 974       bitregion_end -= bitregion_start;
 975       bitregion_start = 0;
 976       bestmode = get_best_mode (bitsize, bitnum,
 977                                 bitregion_start, bitregion_end,
 978                                 MEM_ALIGN (str_rtx), VOIDmode,
 979                                 MEM_VOLATILE_P (str_rtx));
 980       str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
 981     }
 982
 983   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
 984                           bitregion_start, bitregion_end,
 985                           fieldmode, value, true))
 986     gcc_unreachable ();
 987 }
 988 \f
 989 /* Use shifts and boolean operations to store VALUE into a bit field of
 990    width BITSIZE in OP0, starting at bit BITNUM.  */
 991
 992 static void
 993 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 994                        unsigned HOST_WIDE_INT bitnum,
 995                        unsigned HOST_WIDE_INT bitregion_start,
 996                        unsigned HOST_WIDE_INT bitregion_end,
 997                        rtx value)
 998 {
 999   /* There is a case not handled here:
1000      a structure with a known alignment of just a halfword
1001      and a field split across two aligned halfwords within the structure.
1002      Or likewise a structure with a known alignment of just a byte
1003      and a field split across two bytes.
1004      Such cases are not supposed to be able to occur.  */
1005
1006   if (MEM_P (op0))
1007     {
1008       machine_mode mode = GET_MODE (op0);
1009       if (GET_MODE_BITSIZE (mode) == 0
1010           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1011         mode = word_mode;
1012       mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1013                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1014
1015       if (mode == VOIDmode)
1016         {
1017           /* The only way this should occur is if the field spans word
1018              boundaries.  */
1019           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1020                                  bitregion_end, value);
1021           return;
1022         }
1023
1024       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1025     }
1026
1027   store_fixed_bit_field_1 (op0, bitsize, bitnum, value);
1028 }
1029
1030 /* Helper function for store_fixed_bit_field, stores
1031    the bit field always using the MODE of OP0.  */
1032
1033 static void
1034 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1035                          unsigned HOST_WIDE_INT bitnum,
1036                          rtx value)
1037 {
1038   machine_mode mode;
1039   rtx temp;
1040   int all_zero = 0;
1041   int all_one = 0;
1042
1043   mode = GET_MODE (op0);
1044   gcc_assert (SCALAR_INT_MODE_P (mode));
1045
1046   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1047      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1048
1049   if (BYTES_BIG_ENDIAN)
1050     /* BITNUM is the distance between our msb
1051        and that of the containing datum.
1052        Convert it to the distance from the lsb.  */
1053     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1054
1055   /* Now BITNUM is always the distance between our lsb
1056      and that of OP0.  */
1057
1058   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1059      we must first convert its mode to MODE.  */
1060
1061   if (CONST_INT_P (value))
1062     {
1063       unsigned HOST_WIDE_INT v = UINTVAL (value);
1064
1065       if (bitsize < HOST_BITS_PER_WIDE_INT)
1066         v &= ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1;
1067
1068       if (v == 0)
1069         all_zero = 1;
1070       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1071                 && v == ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1)
1072                || (bitsize == HOST_BITS_PER_WIDE_INT
1073                    && v == (unsigned HOST_WIDE_INT) -1))
1074         all_one = 1;
1075
1076       value = lshift_value (mode, v, bitnum);
1077     }
1078   else
1079     {
1080       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1081                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1082
1083       if (GET_MODE (value) != mode)
1084         value = convert_to_mode (mode, value, 1);
1085
1086       if (must_and)
1087         value = expand_binop (mode, and_optab, value,
1088                               mask_rtx (mode, 0, bitsize, 0),
1089                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1090       if (bitnum > 0)
1091         value = expand_shift (LSHIFT_EXPR, mode, value,
1092                               bitnum, NULL_RTX, 1);
1093     }
1094
1095   /* Now clear the chosen bits in OP0,
1096      except that if VALUE is -1 we need not bother.  */
1097   /* We keep the intermediates in registers to allow CSE to combine
1098      consecutive bitfield assignments.  */
1099
1100   temp = force_reg (mode, op0);
1101
1102   if (! all_one)
1103     {
1104       temp = expand_binop (mode, and_optab, temp,
1105                            mask_rtx (mode, bitnum, bitsize, 1),
1106                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1107       temp = force_reg (mode, temp);
1108     }
1109
1110   /* Now logical-or VALUE into OP0, unless it is zero.  */
1111
1112   if (! all_zero)
1113     {
1114       temp = expand_binop (mode, ior_optab, temp, value,
1115                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1116       temp = force_reg (mode, temp);
1117     }
1118
1119   if (op0 != temp)
1120     {
1121       op0 = copy_rtx (op0);
1122       emit_move_insn (op0, temp);
1123     }
1124 }
1125 \f
1126 /* Store a bit field that is split across multiple accessible memory objects.
1127
1128    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1129    BITSIZE is the field width; BITPOS the position of its first bit
1130    (within the word).
1131    VALUE is the value to store.
1132
1133    This does not yet handle fields wider than BITS_PER_WORD.  */
1134
1135 static void
1136 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1137                        unsigned HOST_WIDE_INT bitpos,
1138                        unsigned HOST_WIDE_INT bitregion_start,
1139                        unsigned HOST_WIDE_INT bitregion_end,
1140                        rtx value)
1141 {
1142   unsigned int unit;
1143   unsigned int bitsdone = 0;
1144
1145   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1146      much at a time.  */
1147   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1148     unit = BITS_PER_WORD;
1149   else
1150     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1151
1152   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1153      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1154      again, and we will mutually recurse forever.  */
1155   if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1156     unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1157
1158   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1159      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1160      that VALUE might be a floating-point constant.  */
1161   if (CONSTANT_P (value) && !CONST_INT_P (value))
1162     {
1163       rtx word = gen_lowpart_common (word_mode, value);
1164
1165       if (word && (value != word))
1166         value = word;
1167       else
1168         value = gen_lowpart_common (word_mode,
1169                                     force_reg (GET_MODE (value) != VOIDmode
1170                                                ? GET_MODE (value)
1171                                                : word_mode, value));
1172     }
1173
1174   while (bitsdone < bitsize)
1175     {
1176       unsigned HOST_WIDE_INT thissize;
1177       rtx part, word;
1178       unsigned HOST_WIDE_INT thispos;
1179       unsigned HOST_WIDE_INT offset;
1180
1181       offset = (bitpos + bitsdone) / unit;
1182       thispos = (bitpos + bitsdone) % unit;
1183
1184       /* When region of bytes we can touch is restricted, decrease
1185          UNIT close to the end of the region as needed.  If op0 is a REG
1186          or SUBREG of REG, don't do this, as there can't be data races
1187          on a register and we can expand shorter code in some cases.  */
1188       if (bitregion_end
1189           && unit > BITS_PER_UNIT
1190           && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1191           && !REG_P (op0)
1192           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1193         {
1194           unit = unit / 2;
1195           continue;
1196         }
1197
1198       /* THISSIZE must not overrun a word boundary.  Otherwise,
1199          store_fixed_bit_field will call us again, and we will mutually
1200          recurse forever.  */
1201       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1202       thissize = MIN (thissize, unit - thispos);
1203
1204       if (BYTES_BIG_ENDIAN)
1205         {
1206           /* Fetch successively less significant portions.  */
1207           if (CONST_INT_P (value))
1208             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1209                              >> (bitsize - bitsdone - thissize))
1210                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1211           else
1212             {
1213               int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1214               /* The args are chosen so that the last part includes the
1215                  lsb.  Give extract_bit_field the value it needs (with
1216                  endianness compensation) to fetch the piece we want.  */
1217               part = extract_fixed_bit_field (word_mode, value, thissize,
1218                                               total_bits - bitsize + bitsdone,
1219                                               NULL_RTX, 1);
1220             }
1221         }
1222       else
1223         {
1224           /* Fetch successively more significant portions.  */
1225           if (CONST_INT_P (value))
1226             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1227                              >> bitsdone)
1228                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1229           else
1230             part = extract_fixed_bit_field (word_mode, value, thissize,
1231                                             bitsdone, NULL_RTX, 1);
1232         }
1233
1234       /* If OP0 is a register, then handle OFFSET here.
1235
1236          When handling multiword bitfields, extract_bit_field may pass
1237          down a word_mode SUBREG of a larger REG for a bitfield that actually
1238          crosses a word boundary.  Thus, for a SUBREG, we must find
1239          the current word starting from the base register.  */
1240       if (GET_CODE (op0) == SUBREG)
1241         {
1242           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD)
1243                             + (offset * unit / BITS_PER_WORD);
1244           machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1245           if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1246             word = word_offset ? const0_rtx : op0;
1247           else
1248             word = operand_subword_force (SUBREG_REG (op0), word_offset,
1249                                           GET_MODE (SUBREG_REG (op0)));
1250           offset &= BITS_PER_WORD / unit - 1;
1251         }
1252       else if (REG_P (op0))
1253         {
1254           machine_mode op0_mode = GET_MODE (op0);
1255           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1256             word = offset ? const0_rtx : op0;
1257           else
1258             word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1259                                           GET_MODE (op0));
1260           offset &= BITS_PER_WORD / unit - 1;
1261         }
1262       else
1263         word = op0;
1264
1265       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1266          it is just an out-of-bounds access.  Ignore it.  */
1267       if (word != const0_rtx)
1268         store_fixed_bit_field (word, thissize, offset * unit + thispos,
1269                                bitregion_start, bitregion_end, part);
1270       bitsdone += thissize;
1271     }
1272 }
1273 \f
1274 /* A subroutine of extract_bit_field_1 that converts return value X
1275    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1276    to extract_bit_field.  */
1277
1278 static rtx
1279 convert_extracted_bit_field (rtx x, machine_mode mode,
1280                              machine_mode tmode, bool unsignedp)
1281 {
1282   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1283     return x;
1284
1285   /* If the x mode is not a scalar integral, first convert to the
1286      integer mode of that size and then access it as a floating-point
1287      value via a SUBREG.  */
1288   if (!SCALAR_INT_MODE_P (tmode))
1289     {
1290       machine_mode smode;
1291
1292       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1293       x = convert_to_mode (smode, x, unsignedp);
1294       x = force_reg (smode, x);
1295       return gen_lowpart (tmode, x);
1296     }
1297
1298   return convert_to_mode (tmode, x, unsignedp);
1299 }
1300
1301 /* Try to use an ext(z)v pattern to extract a field from OP0.
1302    Return the extracted value on success, otherwise return null.
1303    EXT_MODE is the mode of the extraction and the other arguments
1304    are as for extract_bit_field.  */
1305
1306 static rtx
1307 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1308                               unsigned HOST_WIDE_INT bitsize,
1309                               unsigned HOST_WIDE_INT bitnum,
1310                               int unsignedp, rtx target,
1311                               machine_mode mode, machine_mode tmode)
1312 {
1313   struct expand_operand ops[4];
1314   rtx spec_target = target;
1315   rtx spec_target_subreg = 0;
1316   machine_mode ext_mode = extv->field_mode;
1317   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1318
1319   if (bitsize == 0 || unit < bitsize)
1320     return NULL_RTX;
1321
1322   if (MEM_P (op0))
1323     /* Get a reference to the first byte of the field.  */
1324     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1325                                 &bitnum);
1326   else
1327     {
1328       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1329       if (BYTES_BIG_ENDIAN)
1330         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1331
1332       /* If op0 is a register, we need it in EXT_MODE to make it
1333          acceptable to the format of ext(z)v.  */
1334       if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1335         return NULL_RTX;
1336       if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1337         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1338     }
1339
1340   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1341      "backwards" from the size of the unit we are extracting from.
1342      Otherwise, we count bits from the most significant on a
1343      BYTES/BITS_BIG_ENDIAN machine.  */
1344
1345   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1346     bitnum = unit - bitsize - bitnum;
1347
1348   if (target == 0)
1349     target = spec_target = gen_reg_rtx (tmode);
1350
1351   if (GET_MODE (target) != ext_mode)
1352     {
1353       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1354          between the mode of the extraction (word_mode) and the target
1355          mode.  Instead, create a temporary and use convert_move to set
1356          the target.  */
1357       if (REG_P (target)
1358           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1359         {
1360           target = gen_lowpart (ext_mode, target);
1361           if (GET_MODE_PRECISION (ext_mode)
1362               > GET_MODE_PRECISION (GET_MODE (spec_target)))
1363             spec_target_subreg = target;
1364         }
1365       else
1366         target = gen_reg_rtx (ext_mode);
1367     }
1368
1369   create_output_operand (&ops[0], target, ext_mode);
1370   create_fixed_operand (&ops[1], op0);
1371   create_integer_operand (&ops[2], bitsize);
1372   create_integer_operand (&ops[3], bitnum);
1373   if (maybe_expand_insn (extv->icode, 4, ops))
1374     {
1375       target = ops[0].value;
1376       if (target == spec_target)
1377         return target;
1378       if (target == spec_target_subreg)
1379         return spec_target;
1380       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1381     }
1382   return NULL_RTX;
1383 }
1384
1385 /* A subroutine of extract_bit_field, with the same arguments.
1386    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1387    if we can find no other means of implementing the operation.
1388    if FALLBACK_P is false, return NULL instead.  */
1389
1390 static rtx
1391 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1392                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1393                      machine_mode mode, machine_mode tmode,
1394                      bool fallback_p)
1395 {
1396   rtx op0 = str_rtx;
1397   machine_mode int_mode;
1398   machine_mode mode1;
1399
1400   if (tmode == VOIDmode)
1401     tmode = mode;
1402
1403   while (GET_CODE (op0) == SUBREG)
1404     {
1405       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1406       op0 = SUBREG_REG (op0);
1407     }
1408
1409   /* If we have an out-of-bounds access to a register, just return an
1410      uninitialized register of the required mode.  This can occur if the
1411      source code contains an out-of-bounds access to a small array.  */
1412   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1413     return gen_reg_rtx (tmode);
1414
1415   if (REG_P (op0)
1416       && mode == GET_MODE (op0)
1417       && bitnum == 0
1418       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1419     {
1420       /* We're trying to extract a full register from itself.  */
1421       return op0;
1422     }
1423
1424   /* See if we can get a better vector mode before extracting.  */
1425   if (VECTOR_MODE_P (GET_MODE (op0))
1426       && !MEM_P (op0)
1427       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1428     {
1429       machine_mode new_mode;
1430
1431       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1432         new_mode = MIN_MODE_VECTOR_FLOAT;
1433       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1434         new_mode = MIN_MODE_VECTOR_FRACT;
1435       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1436         new_mode = MIN_MODE_VECTOR_UFRACT;
1437       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1438         new_mode = MIN_MODE_VECTOR_ACCUM;
1439       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1440         new_mode = MIN_MODE_VECTOR_UACCUM;
1441       else
1442         new_mode = MIN_MODE_VECTOR_INT;
1443
1444       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1445         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1446             && targetm.vector_mode_supported_p (new_mode))
1447           break;
1448       if (new_mode != VOIDmode)
1449         op0 = gen_lowpart (new_mode, op0);
1450     }
1451
1452   /* Use vec_extract patterns for extracting parts of vectors whenever
1453      available.  */
1454   if (VECTOR_MODE_P (GET_MODE (op0))
1455       && !MEM_P (op0)
1456       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1457       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1458           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1459     {
1460       struct expand_operand ops[3];
1461       machine_mode outermode = GET_MODE (op0);
1462       machine_mode innermode = GET_MODE_INNER (outermode);
1463       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1464       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1465
1466       create_output_operand (&ops[0], target, innermode);
1467       create_input_operand (&ops[1], op0, outermode);
1468       create_integer_operand (&ops[2], pos);
1469       if (maybe_expand_insn (icode, 3, ops))
1470         {
1471           target = ops[0].value;
1472           if (GET_MODE (target) != mode)
1473             return gen_lowpart (tmode, target);
1474           return target;
1475         }
1476     }
1477
1478   /* Make sure we are playing with integral modes.  Pun with subregs
1479      if we aren't.  */
1480   {
1481     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1482     if (imode != GET_MODE (op0))
1483       {
1484         if (MEM_P (op0))
1485           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1486         else if (imode != BLKmode)
1487           {
1488             op0 = gen_lowpart (imode, op0);
1489
1490             /* If we got a SUBREG, force it into a register since we
1491                aren't going to be able to do another SUBREG on it.  */
1492             if (GET_CODE (op0) == SUBREG)
1493               op0 = force_reg (imode, op0);
1494           }
1495         else if (REG_P (op0))
1496           {
1497             rtx reg, subreg;
1498             imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1499                                             MODE_INT);
1500             reg = gen_reg_rtx (imode);
1501             subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1502             emit_move_insn (subreg, op0);
1503             op0 = reg;
1504             bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1505           }
1506         else
1507           {
1508             HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1509             rtx mem = assign_stack_temp (GET_MODE (op0), size);
1510             emit_move_insn (mem, op0);
1511             op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1512           }
1513       }
1514   }
1515
1516   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1517      If that's wrong, the solution is to test for it and set TARGET to 0
1518      if needed.  */
1519
1520   /* Get the mode of the field to use for atomic access or subreg
1521      conversion.  */
1522   mode1 = mode;
1523   if (SCALAR_INT_MODE_P (tmode))
1524     {
1525       machine_mode try_mode = mode_for_size (bitsize,
1526                                                   GET_MODE_CLASS (tmode), 0);
1527       if (try_mode != BLKmode)
1528         mode1 = try_mode;
1529     }
1530   gcc_assert (mode1 != BLKmode);
1531
1532   /* Extraction of a full MODE1 value can be done with a subreg as long
1533      as the least significant bit of the value is the least significant
1534      bit of either OP0 or a word of OP0.  */
1535   if (!MEM_P (op0)
1536       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1537       && bitsize == GET_MODE_BITSIZE (mode1)
1538       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1539     {
1540       rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1541                                      bitnum / BITS_PER_UNIT);
1542       if (sub)
1543         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1544     }
1545
1546   /* Extraction of a full MODE1 value can be done with a load as long as
1547      the field is on a byte boundary and is sufficiently aligned.  */
1548   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1549     {
1550       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1551       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1552     }
1553
1554   /* Handle fields bigger than a word.  */
1555
1556   if (bitsize > BITS_PER_WORD)
1557     {
1558       /* Here we transfer the words of the field
1559          in the order least significant first.
1560          This is because the most significant word is the one which may
1561          be less than full.  */
1562
1563       unsigned int backwards = WORDS_BIG_ENDIAN;
1564       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1565       unsigned int i;
1566       rtx_insn *last;
1567
1568       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1569         target = gen_reg_rtx (mode);
1570
1571       /* Indicate for flow that the entire target reg is being set.  */
1572       emit_clobber (target);
1573
1574       last = get_last_insn ();
1575       for (i = 0; i < nwords; i++)
1576         {
1577           /* If I is 0, use the low-order word in both field and target;
1578              if I is 1, use the next to lowest word; and so on.  */
1579           /* Word number in TARGET to use.  */
1580           unsigned int wordnum
1581             = (backwards
1582                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1583                : i);
1584           /* Offset from start of field in OP0.  */
1585           unsigned int bit_offset = (backwards
1586                                      ? MAX ((int) bitsize - ((int) i + 1)
1587                                             * BITS_PER_WORD,
1588                                             0)
1589                                      : (int) i * BITS_PER_WORD);
1590           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1591           rtx result_part
1592             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1593                                              bitsize - i * BITS_PER_WORD),
1594                                    bitnum + bit_offset, 1, target_part,
1595                                    mode, word_mode, fallback_p);
1596
1597           gcc_assert (target_part);
1598           if (!result_part)
1599             {
1600               delete_insns_since (last);
1601               return NULL;
1602             }
1603
1604           if (result_part != target_part)
1605             emit_move_insn (target_part, result_part);
1606         }
1607
1608       if (unsignedp)
1609         {
1610           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1611              need to be zero'd out.  */
1612           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1613             {
1614               unsigned int i, total_words;
1615
1616               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1617               for (i = nwords; i < total_words; i++)
1618                 emit_move_insn
1619                   (operand_subword (target,
1620                                     backwards ? total_words - i - 1 : i,
1621                                     1, VOIDmode),
1622                    const0_rtx);
1623             }
1624           return target;
1625         }
1626
1627       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1628       target = expand_shift (LSHIFT_EXPR, mode, target,
1629                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1630       return expand_shift (RSHIFT_EXPR, mode, target,
1631                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1632     }
1633
1634   /* If OP0 is a multi-word register, narrow it to the affected word.
1635      If the region spans two words, defer to extract_split_bit_field.  */
1636   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1637     {
1638       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1639                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1640       bitnum %= BITS_PER_WORD;
1641       if (bitnum + bitsize > BITS_PER_WORD)
1642         {
1643           if (!fallback_p)
1644             return NULL_RTX;
1645           target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1646           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1647         }
1648     }
1649
1650   /* From here on we know the desired field is smaller than a word.
1651      If OP0 is a register, it too fits within a word.  */
1652   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1653   extraction_insn extv;
1654   if (!MEM_P (op0)
1655       /* ??? We could limit the structure size to the part of OP0 that
1656          contains the field, with appropriate checks for endianness
1657          and TRULY_NOOP_TRUNCATION.  */
1658       && get_best_reg_extraction_insn (&extv, pattern,
1659                                        GET_MODE_BITSIZE (GET_MODE (op0)),
1660                                        tmode))
1661     {
1662       rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1663                                                  unsignedp, target, mode,
1664                                                  tmode);
1665       if (result)
1666         return result;
1667     }
1668
1669   /* If OP0 is a memory, try copying it to a register and seeing if a
1670      cheap register alternative is available.  */
1671   if (MEM_P (op0))
1672     {
1673       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1674                                         tmode))
1675         {
1676           rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1677                                                      bitnum, unsignedp,
1678                                                      target, mode,
1679                                                      tmode);
1680           if (result)
1681             return result;
1682         }
1683
1684       rtx_insn *last = get_last_insn ();
1685
1686       /* Try loading part of OP0 into a register and extracting the
1687          bitfield from that.  */
1688       unsigned HOST_WIDE_INT bitpos;
1689       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1690                                                0, 0, tmode, &bitpos);
1691       if (xop0)
1692         {
1693           xop0 = copy_to_reg (xop0);
1694           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1695                                             unsignedp, target,
1696                                             mode, tmode, false);
1697           if (result)
1698             return result;
1699           delete_insns_since (last);
1700         }
1701     }
1702
1703   if (!fallback_p)
1704     return NULL;
1705
1706   /* Find a correspondingly-sized integer field, so we can apply
1707      shifts and masks to it.  */
1708   int_mode = int_mode_for_mode (tmode);
1709   if (int_mode == BLKmode)
1710     int_mode = int_mode_for_mode (mode);
1711   /* Should probably push op0 out to memory and then do a load.  */
1712   gcc_assert (int_mode != BLKmode);
1713
1714   target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
1715                                     target, unsignedp);
1716   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1717 }
1718
1719 /* Generate code to extract a byte-field from STR_RTX
1720    containing BITSIZE bits, starting at BITNUM,
1721    and put it in TARGET if possible (if TARGET is nonzero).
1722    Regardless of TARGET, we return the rtx for where the value is placed.
1723
1724    STR_RTX is the structure containing the byte (a REG or MEM).
1725    UNSIGNEDP is nonzero if this is an unsigned bit field.
1726    MODE is the natural mode of the field value once extracted.
1727    TMODE is the mode the caller would like the value to have;
1728    but the value may be returned with type MODE instead.
1729
1730    If a TARGET is specified and we can store in it at no extra cost,
1731    we do so, and return TARGET.
1732    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1733    if they are equally easy.  */
1734
1735 rtx
1736 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1737                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1738                    machine_mode mode, machine_mode tmode)
1739 {
1740   machine_mode mode1;
1741
1742   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1743   if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1744     mode1 = GET_MODE (str_rtx);
1745   else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1746     mode1 = GET_MODE (target);
1747   else
1748     mode1 = tmode;
1749
1750   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1751     {
1752       rtx result;
1753
1754       /* Extraction of a full MODE1 value can be done with a load as long as
1755          the field is on a byte boundary and is sufficiently aligned.  */
1756       if (simple_mem_bitfield_p (str_rtx, bitsize, bitnum, mode1))
1757         result = adjust_bitfield_address (str_rtx, mode1,
1758                                           bitnum / BITS_PER_UNIT);
1759       else
1760         {
1761           str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1762                                           &bitnum);
1763           result = extract_fixed_bit_field_1 (mode, str_rtx, bitsize, bitnum,
1764                                               target, unsignedp);
1765         }
1766
1767       return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1768     }
1769
1770   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1771                               target, mode, tmode, true);
1772 }
1773 \f
1774 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1775    from bit BITNUM of OP0.
1776
1777    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1778    If TARGET is nonzero, attempts to store the value there
1779    and return TARGET, but this is not guaranteed.
1780    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1781
1782 static rtx
1783 extract_fixed_bit_field (machine_mode tmode, rtx op0,
1784                          unsigned HOST_WIDE_INT bitsize,
1785                          unsigned HOST_WIDE_INT bitnum, rtx target,
1786                          int unsignedp)
1787 {
1788   if (MEM_P (op0))
1789     {
1790       machine_mode mode
1791         = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1792                          MEM_VOLATILE_P (op0));
1793
1794       if (mode == VOIDmode)
1795         /* The only way this should occur is if the field spans word
1796            boundaries.  */
1797         return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1798
1799       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1800     }
1801
1802   return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1803                                     target, unsignedp);
1804 }
1805
1806 /* Helper function for extract_fixed_bit_field, extracts
1807    the bit field always using the MODE of OP0.  */
1808
1809 static rtx
1810 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
1811                            unsigned HOST_WIDE_INT bitsize,
1812                            unsigned HOST_WIDE_INT bitnum, rtx target,
1813                            int unsignedp)
1814 {
1815   machine_mode mode = GET_MODE (op0);
1816   gcc_assert (SCALAR_INT_MODE_P (mode));
1817
1818   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1819      for invalid input, such as extract equivalent of f5 from
1820      gcc.dg/pr48335-2.c.  */
1821
1822   if (BYTES_BIG_ENDIAN)
1823     /* BITNUM is the distance between our msb and that of OP0.
1824        Convert it to the distance from the lsb.  */
1825     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1826
1827   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1828      We have reduced the big-endian case to the little-endian case.  */
1829
1830   if (unsignedp)
1831     {
1832       if (bitnum)
1833         {
1834           /* If the field does not already start at the lsb,
1835              shift it so it does.  */
1836           /* Maybe propagate the target for the shift.  */
1837           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1838           if (tmode != mode)
1839             subtarget = 0;
1840           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1841         }
1842       /* Convert the value to the desired mode.  */
1843       if (mode != tmode)
1844         op0 = convert_to_mode (tmode, op0, 1);
1845
1846       /* Unless the msb of the field used to be the msb when we shifted,
1847          mask out the upper bits.  */
1848
1849       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
1850         return expand_binop (GET_MODE (op0), and_optab, op0,
1851                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1852                              target, 1, OPTAB_LIB_WIDEN);
1853       return op0;
1854     }
1855
1856   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1857      then arithmetic-shift its lsb to the lsb of the word.  */
1858   op0 = force_reg (mode, op0);
1859
1860   /* Find the narrowest integer mode that contains the field.  */
1861
1862   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1863        mode = GET_MODE_WIDER_MODE (mode))
1864     if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
1865       {
1866         op0 = convert_to_mode (mode, op0, 0);
1867         break;
1868       }
1869
1870   if (mode != tmode)
1871     target = 0;
1872
1873   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
1874     {
1875       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
1876       /* Maybe propagate the target for the shift.  */
1877       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1878       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1879     }
1880
1881   return expand_shift (RSHIFT_EXPR, mode, op0,
1882                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1883 }
1884
1885 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1886    VALUE << BITPOS.  */
1887
1888 static rtx
1889 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
1890               int bitpos)
1891 {
1892   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
1893 }
1894 \f
1895 /* Extract a bit field that is split across two words
1896    and return an RTX for the result.
1897
1898    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1899    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1900    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1901
1902 static rtx
1903 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1904                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1905 {
1906   unsigned int unit;
1907   unsigned int bitsdone = 0;
1908   rtx result = NULL_RTX;
1909   int first = 1;
1910
1911   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1912      much at a time.  */
1913   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1914     unit = BITS_PER_WORD;
1915   else
1916     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1917
1918   while (bitsdone < bitsize)
1919     {
1920       unsigned HOST_WIDE_INT thissize;
1921       rtx part, word;
1922       unsigned HOST_WIDE_INT thispos;
1923       unsigned HOST_WIDE_INT offset;
1924
1925       offset = (bitpos + bitsdone) / unit;
1926       thispos = (bitpos + bitsdone) % unit;
1927
1928       /* THISSIZE must not overrun a word boundary.  Otherwise,
1929          extract_fixed_bit_field will call us again, and we will mutually
1930          recurse forever.  */
1931       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1932       thissize = MIN (thissize, unit - thispos);
1933
1934       /* If OP0 is a register, then handle OFFSET here.
1935
1936          When handling multiword bitfields, extract_bit_field may pass
1937          down a word_mode SUBREG of a larger REG for a bitfield that actually
1938          crosses a word boundary.  Thus, for a SUBREG, we must find
1939          the current word starting from the base register.  */
1940       if (GET_CODE (op0) == SUBREG)
1941         {
1942           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1943           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1944                                         GET_MODE (SUBREG_REG (op0)));
1945           offset = 0;
1946         }
1947       else if (REG_P (op0))
1948         {
1949           word = operand_subword_force (op0, offset, GET_MODE (op0));
1950           offset = 0;
1951         }
1952       else
1953         word = op0;
1954
1955       /* Extract the parts in bit-counting order,
1956          whose meaning is determined by BYTES_PER_UNIT.
1957          OFFSET is in UNITs, and UNIT is in bits.  */
1958       part = extract_fixed_bit_field (word_mode, word, thissize,
1959                                       offset * unit + thispos, 0, 1);
1960       bitsdone += thissize;
1961
1962       /* Shift this part into place for the result.  */
1963       if (BYTES_BIG_ENDIAN)
1964         {
1965           if (bitsize != bitsdone)
1966             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1967                                  bitsize - bitsdone, 0, 1);
1968         }
1969       else
1970         {
1971           if (bitsdone != thissize)
1972             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1973                                  bitsdone - thissize, 0, 1);
1974         }
1975
1976       if (first)
1977         result = part;
1978       else
1979         /* Combine the parts with bitwise or.  This works
1980            because we extracted each part as an unsigned bit field.  */
1981         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1982                                OPTAB_LIB_WIDEN);
1983
1984       first = 0;
1985     }
1986
1987   /* Unsigned bit field: we are done.  */
1988   if (unsignedp)
1989     return result;
1990   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1991   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1992                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
1993   return expand_shift (RSHIFT_EXPR, word_mode, result,
1994                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
1995 }
1996 \f
1997 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
1998    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
1999    MODE, fill the upper bits with zeros.  Fail if the layout of either
2000    mode is unknown (as for CC modes) or if the extraction would involve
2001    unprofitable mode punning.  Return the value on success, otherwise
2002    return null.
2003
2004    This is different from gen_lowpart* in these respects:
2005
2006      - the returned value must always be considered an rvalue
2007
2008      - when MODE is wider than SRC_MODE, the extraction involves
2009        a zero extension
2010
2011      - when MODE is smaller than SRC_MODE, the extraction involves
2012        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2013
2014    In other words, this routine performs a computation, whereas the
2015    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2016    operations.  */
2017
2018 rtx
2019 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2020 {
2021   machine_mode int_mode, src_int_mode;
2022
2023   if (mode == src_mode)
2024     return src;
2025
2026   if (CONSTANT_P (src))
2027     {
2028       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2029          fails, it will happily create (subreg (symbol_ref)) or similar
2030          invalid SUBREGs.  */
2031       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2032       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2033       if (ret)
2034         return ret;
2035
2036       if (GET_MODE (src) == VOIDmode
2037           || !validate_subreg (mode, src_mode, src, byte))
2038         return NULL_RTX;
2039
2040       src = force_reg (GET_MODE (src), src);
2041       return gen_rtx_SUBREG (mode, src, byte);
2042     }
2043
2044   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2045     return NULL_RTX;
2046
2047   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2048       && MODES_TIEABLE_P (mode, src_mode))
2049     {
2050       rtx x = gen_lowpart_common (mode, src);
2051       if (x)
2052         return x;
2053     }
2054
2055   src_int_mode = int_mode_for_mode (src_mode);
2056   int_mode = int_mode_for_mode (mode);
2057   if (src_int_mode == BLKmode || int_mode == BLKmode)
2058     return NULL_RTX;
2059
2060   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2061     return NULL_RTX;
2062   if (!MODES_TIEABLE_P (int_mode, mode))
2063     return NULL_RTX;
2064
2065   src = gen_lowpart (src_int_mode, src);
2066   src = convert_modes (int_mode, src_int_mode, src, true);
2067   src = gen_lowpart (mode, src);
2068   return src;
2069 }
2070 \f
2071 /* Add INC into TARGET.  */
2072
2073 void
2074 expand_inc (rtx target, rtx inc)
2075 {
2076   rtx value = expand_binop (GET_MODE (target), add_optab,
2077                             target, inc,
2078                             target, 0, OPTAB_LIB_WIDEN);
2079   if (value != target)
2080     emit_move_insn (target, value);
2081 }
2082
2083 /* Subtract DEC from TARGET.  */
2084
2085 void
2086 expand_dec (rtx target, rtx dec)
2087 {
2088   rtx value = expand_binop (GET_MODE (target), sub_optab,
2089                             target, dec,
2090                             target, 0, OPTAB_LIB_WIDEN);
2091   if (value != target)
2092     emit_move_insn (target, value);
2093 }
2094 \f
2095 /* Output a shift instruction for expression code CODE,
2096    with SHIFTED being the rtx for the value to shift,
2097    and AMOUNT the rtx for the amount to shift by.
2098    Store the result in the rtx TARGET, if that is convenient.
2099    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2100    Return the rtx for where the value is.  */
2101
2102 static rtx
2103 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2104                 rtx amount, rtx target, int unsignedp)
2105 {
2106   rtx op1, temp = 0;
2107   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2108   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2109   optab lshift_optab = ashl_optab;
2110   optab rshift_arith_optab = ashr_optab;
2111   optab rshift_uns_optab = lshr_optab;
2112   optab lrotate_optab = rotl_optab;
2113   optab rrotate_optab = rotr_optab;
2114   machine_mode op1_mode;
2115   machine_mode scalar_mode = mode;
2116   int attempt;
2117   bool speed = optimize_insn_for_speed_p ();
2118
2119   if (VECTOR_MODE_P (mode))
2120     scalar_mode = GET_MODE_INNER (mode);
2121   op1 = amount;
2122   op1_mode = GET_MODE (op1);
2123
2124   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2125      shift amount is a vector, use the vector/vector shift patterns.  */
2126   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2127     {
2128       lshift_optab = vashl_optab;
2129       rshift_arith_optab = vashr_optab;
2130       rshift_uns_optab = vlshr_optab;
2131       lrotate_optab = vrotl_optab;
2132       rrotate_optab = vrotr_optab;
2133     }
2134
2135   /* Previously detected shift-counts computed by NEGATE_EXPR
2136      and shifted in the other direction; but that does not work
2137      on all machines.  */
2138
2139   if (SHIFT_COUNT_TRUNCATED)
2140     {
2141       if (CONST_INT_P (op1)
2142           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2143               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2144         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2145                        % GET_MODE_BITSIZE (scalar_mode));
2146       else if (GET_CODE (op1) == SUBREG
2147                && subreg_lowpart_p (op1)
2148                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2149                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2150         op1 = SUBREG_REG (op1);
2151     }
2152
2153   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2154      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2155      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2156      amount instead.  */
2157   if (rotate
2158       && CONST_INT_P (op1)
2159       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2160                    GET_MODE_BITSIZE (scalar_mode) - 1))
2161     {
2162       op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2163       left = !left;
2164       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2165     }
2166
2167   if (op1 == const0_rtx)
2168     return shifted;
2169
2170   /* Check whether its cheaper to implement a left shift by a constant
2171      bit count by a sequence of additions.  */
2172   if (code == LSHIFT_EXPR
2173       && CONST_INT_P (op1)
2174       && INTVAL (op1) > 0
2175       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2176       && INTVAL (op1) < MAX_BITS_PER_WORD
2177       && (shift_cost (speed, mode, INTVAL (op1))
2178           > INTVAL (op1) * add_cost (speed, mode))
2179       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2180     {
2181       int i;
2182       for (i = 0; i < INTVAL (op1); i++)
2183         {
2184           temp = force_reg (mode, shifted);
2185           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2186                                   unsignedp, OPTAB_LIB_WIDEN);
2187         }
2188       return shifted;
2189     }
2190
2191   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2192     {
2193       enum optab_methods methods;
2194
2195       if (attempt == 0)
2196         methods = OPTAB_DIRECT;
2197       else if (attempt == 1)
2198         methods = OPTAB_WIDEN;
2199       else
2200         methods = OPTAB_LIB_WIDEN;
2201
2202       if (rotate)
2203         {
2204           /* Widening does not work for rotation.  */
2205           if (methods == OPTAB_WIDEN)
2206             continue;
2207           else if (methods == OPTAB_LIB_WIDEN)
2208             {
2209               /* If we have been unable to open-code this by a rotation,
2210                  do it as the IOR of two shifts.  I.e., to rotate A
2211                  by N bits, compute
2212                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2213                  where C is the bitsize of A.
2214
2215                  It is theoretically possible that the target machine might
2216                  not be able to perform either shift and hence we would
2217                  be making two libcalls rather than just the one for the
2218                  shift (similarly if IOR could not be done).  We will allow
2219                  this extremely unlikely lossage to avoid complicating the
2220                  code below.  */
2221
2222               rtx subtarget = target == shifted ? 0 : target;
2223               rtx new_amount, other_amount;
2224               rtx temp1;
2225
2226               new_amount = op1;
2227               if (op1 == const0_rtx)
2228                 return shifted;
2229               else if (CONST_INT_P (op1))
2230                 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2231                                         - INTVAL (op1));
2232               else
2233                 {
2234                   other_amount
2235                     = simplify_gen_unary (NEG, GET_MODE (op1),
2236                                           op1, GET_MODE (op1));
2237                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2238                   other_amount
2239                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2240                                            gen_int_mode (mask, GET_MODE (op1)));
2241                 }
2242
2243               shifted = force_reg (mode, shifted);
2244
2245               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2246                                      mode, shifted, new_amount, 0, 1);
2247               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2248                                       mode, shifted, other_amount,
2249                                       subtarget, 1);
2250               return expand_binop (mode, ior_optab, temp, temp1, target,
2251                                    unsignedp, methods);
2252             }
2253
2254           temp = expand_binop (mode,
2255                                left ? lrotate_optab : rrotate_optab,
2256                                shifted, op1, target, unsignedp, methods);
2257         }
2258       else if (unsignedp)
2259         temp = expand_binop (mode,
2260                              left ? lshift_optab : rshift_uns_optab,
2261                              shifted, op1, target, unsignedp, methods);
2262
2263       /* Do arithmetic shifts.
2264          Also, if we are going to widen the operand, we can just as well
2265          use an arithmetic right-shift instead of a logical one.  */
2266       if (temp == 0 && ! rotate
2267           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2268         {
2269           enum optab_methods methods1 = methods;
2270
2271           /* If trying to widen a log shift to an arithmetic shift,
2272              don't accept an arithmetic shift of the same size.  */
2273           if (unsignedp)
2274             methods1 = OPTAB_MUST_WIDEN;
2275
2276           /* Arithmetic shift */
2277
2278           temp = expand_binop (mode,
2279                                left ? lshift_optab : rshift_arith_optab,
2280                                shifted, op1, target, unsignedp, methods1);
2281         }
2282
2283       /* We used to try extzv here for logical right shifts, but that was
2284          only useful for one machine, the VAX, and caused poor code
2285          generation there for lshrdi3, so the code was deleted and a
2286          define_expand for lshrsi3 was added to vax.md.  */
2287     }
2288
2289   gcc_assert (temp);
2290   return temp;
2291 }
2292
2293 /* Output a shift instruction for expression code CODE,
2294    with SHIFTED being the rtx for the value to shift,
2295    and AMOUNT the amount to shift by.
2296    Store the result in the rtx TARGET, if that is convenient.
2297    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2298    Return the rtx for where the value is.  */
2299
2300 rtx
2301 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2302               int amount, rtx target, int unsignedp)
2303 {
2304   return expand_shift_1 (code, mode,
2305                          shifted, GEN_INT (amount), target, unsignedp);
2306 }
2307
2308 /* Output a shift instruction for expression code CODE,
2309    with SHIFTED being the rtx for the value to shift,
2310    and AMOUNT the tree for the amount to shift by.
2311    Store the result in the rtx TARGET, if that is convenient.
2312    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2313    Return the rtx for where the value is.  */
2314
2315 rtx
2316 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2317                        tree amount, rtx target, int unsignedp)
2318 {
2319   return expand_shift_1 (code, mode,
2320                          shifted, expand_normal (amount), target, unsignedp);
2321 }
2322
2323 \f
2324 /* Indicates the type of fixup needed after a constant multiplication.
2325    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2326    the result should be negated, and ADD_VARIANT means that the
2327    multiplicand should be added to the result.  */
2328 enum mult_variant {basic_variant, negate_variant, add_variant};
2329
2330 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2331                         const struct mult_cost *, machine_mode mode);
2332 static bool choose_mult_variant (machine_mode, HOST_WIDE_INT,
2333                                  struct algorithm *, enum mult_variant *, int);
2334 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2335                               const struct algorithm *, enum mult_variant);
2336 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2337 static rtx extract_high_half (machine_mode, rtx);
2338 static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2339 static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
2340                                        int, int);
2341 /* Compute and return the best algorithm for multiplying by T.
2342    The algorithm must cost less than cost_limit
2343    If retval.cost >= COST_LIMIT, no algorithm was found and all
2344    other field of the returned struct are undefined.
2345    MODE is the machine mode of the multiplication.  */
2346
2347 static void
2348 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2349             const struct mult_cost *cost_limit, machine_mode mode)
2350 {
2351   int m;
2352   struct algorithm *alg_in, *best_alg;
2353   struct mult_cost best_cost;
2354   struct mult_cost new_limit;
2355   int op_cost, op_latency;
2356   unsigned HOST_WIDE_INT orig_t = t;
2357   unsigned HOST_WIDE_INT q;
2358   int maxm, hash_index;
2359   bool cache_hit = false;
2360   enum alg_code cache_alg = alg_zero;
2361   bool speed = optimize_insn_for_speed_p ();
2362   machine_mode imode;
2363   struct alg_hash_entry *entry_ptr;
2364
2365   /* Indicate that no algorithm is yet found.  If no algorithm
2366      is found, this value will be returned and indicate failure.  */
2367   alg_out->cost.cost = cost_limit->cost + 1;
2368   alg_out->cost.latency = cost_limit->latency + 1;
2369
2370   if (cost_limit->cost < 0
2371       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2372     return;
2373
2374   /* Be prepared for vector modes.  */
2375   imode = GET_MODE_INNER (mode);
2376   if (imode == VOIDmode)
2377     imode = mode;
2378
2379   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2380
2381   /* Restrict the bits of "t" to the multiplication's mode.  */
2382   t &= GET_MODE_MASK (imode);
2383
2384   /* t == 1 can be done in zero cost.  */
2385   if (t == 1)
2386     {
2387       alg_out->ops = 1;
2388       alg_out->cost.cost = 0;
2389       alg_out->cost.latency = 0;
2390       alg_out->op[0] = alg_m;
2391       return;
2392     }
2393
2394   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2395      fail now.  */
2396   if (t == 0)
2397     {
2398       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2399         return;
2400       else
2401         {
2402           alg_out->ops = 1;
2403           alg_out->cost.cost = zero_cost (speed);
2404           alg_out->cost.latency = zero_cost (speed);
2405           alg_out->op[0] = alg_zero;
2406           return;
2407         }
2408     }
2409
2410   /* We'll be needing a couple extra algorithm structures now.  */
2411
2412   alg_in = XALLOCA (struct algorithm);
2413   best_alg = XALLOCA (struct algorithm);
2414   best_cost = *cost_limit;
2415
2416   /* Compute the hash index.  */
2417   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2418
2419   /* See if we already know what to do for T.  */
2420   entry_ptr = alg_hash_entry_ptr (hash_index);
2421   if (entry_ptr->t == t
2422       && entry_ptr->mode == mode
2423       && entry_ptr->mode == mode
2424       && entry_ptr->speed == speed
2425       && entry_ptr->alg != alg_unknown)
2426     {
2427       cache_alg = entry_ptr->alg;
2428
2429       if (cache_alg == alg_impossible)
2430         {
2431           /* The cache tells us that it's impossible to synthesize
2432              multiplication by T within entry_ptr->cost.  */
2433           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2434             /* COST_LIMIT is at least as restrictive as the one
2435                recorded in the hash table, in which case we have no
2436                hope of synthesizing a multiplication.  Just
2437                return.  */
2438             return;
2439
2440           /* If we get here, COST_LIMIT is less restrictive than the
2441              one recorded in the hash table, so we may be able to
2442              synthesize a multiplication.  Proceed as if we didn't
2443              have the cache entry.  */
2444         }
2445       else
2446         {
2447           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2448             /* The cached algorithm shows that this multiplication
2449                requires more cost than COST_LIMIT.  Just return.  This
2450                way, we don't clobber this cache entry with
2451                alg_impossible but retain useful information.  */
2452             return;
2453
2454           cache_hit = true;
2455
2456           switch (cache_alg)
2457             {
2458             case alg_shift:
2459               goto do_alg_shift;
2460
2461             case alg_add_t_m2:
2462             case alg_sub_t_m2:
2463               goto do_alg_addsub_t_m2;
2464
2465             case alg_add_factor:
2466             case alg_sub_factor:
2467               goto do_alg_addsub_factor;
2468
2469             case alg_add_t2_m:
2470               goto do_alg_add_t2_m;
2471
2472             case alg_sub_t2_m:
2473               goto do_alg_sub_t2_m;
2474
2475             default:
2476               gcc_unreachable ();
2477             }
2478         }
2479     }
2480
2481   /* If we have a group of zero bits at the low-order part of T, try
2482      multiplying by the remaining bits and then doing a shift.  */
2483
2484   if ((t & 1) == 0)
2485     {
2486     do_alg_shift:
2487       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2488       if (m < maxm)
2489         {
2490           q = t >> m;
2491           /* The function expand_shift will choose between a shift and
2492              a sequence of additions, so the observed cost is given as
2493              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2494           op_cost = m * add_cost (speed, mode);
2495           if (shift_cost (speed, mode, m) < op_cost)
2496             op_cost = shift_cost (speed, mode, m);
2497           new_limit.cost = best_cost.cost - op_cost;
2498           new_limit.latency = best_cost.latency - op_cost;
2499           synth_mult (alg_in, q, &new_limit, mode);
2500
2501           alg_in->cost.cost += op_cost;
2502           alg_in->cost.latency += op_cost;
2503           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2504             {
2505               struct algorithm *x;
2506               best_cost = alg_in->cost;
2507               x = alg_in, alg_in = best_alg, best_alg = x;
2508               best_alg->log[best_alg->ops] = m;
2509               best_alg->op[best_alg->ops] = alg_shift;
2510             }
2511
2512           /* See if treating ORIG_T as a signed number yields a better
2513              sequence.  Try this sequence only for a negative ORIG_T
2514              as it would be useless for a non-negative ORIG_T.  */
2515           if ((HOST_WIDE_INT) orig_t < 0)
2516             {
2517               /* Shift ORIG_T as follows because a right shift of a
2518                  negative-valued signed type is implementation
2519                  defined.  */
2520               q = ~(~orig_t >> m);
2521               /* The function expand_shift will choose between a shift
2522                  and a sequence of additions, so the observed cost is
2523                  given as MIN (m * add_cost(speed, mode),
2524                  shift_cost(speed, mode, m)).  */
2525               op_cost = m * add_cost (speed, mode);
2526               if (shift_cost (speed, mode, m) < op_cost)
2527                 op_cost = shift_cost (speed, mode, m);
2528               new_limit.cost = best_cost.cost - op_cost;
2529               new_limit.latency = best_cost.latency - op_cost;
2530               synth_mult (alg_in, q, &new_limit, mode);
2531
2532               alg_in->cost.cost += op_cost;
2533               alg_in->cost.latency += op_cost;
2534               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2535                 {
2536                   struct algorithm *x;
2537                   best_cost = alg_in->cost;
2538                   x = alg_in, alg_in = best_alg, best_alg = x;
2539                   best_alg->log[best_alg->ops] = m;
2540                   best_alg->op[best_alg->ops] = alg_shift;
2541                 }
2542             }
2543         }
2544       if (cache_hit)
2545         goto done;
2546     }
2547
2548   /* If we have an odd number, add or subtract one.  */
2549   if ((t & 1) != 0)
2550     {
2551       unsigned HOST_WIDE_INT w;
2552
2553     do_alg_addsub_t_m2:
2554       for (w = 1; (w & t) != 0; w <<= 1)
2555         ;
2556       /* If T was -1, then W will be zero after the loop.  This is another
2557          case where T ends with ...111.  Handling this with (T + 1) and
2558          subtract 1 produces slightly better code and results in algorithm
2559          selection much faster than treating it like the ...0111 case
2560          below.  */
2561       if (w == 0
2562           || (w > 2
2563               /* Reject the case where t is 3.
2564                  Thus we prefer addition in that case.  */
2565               && t != 3))
2566         {
2567           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2568
2569           op_cost = add_cost (speed, mode);
2570           new_limit.cost = best_cost.cost - op_cost;
2571           new_limit.latency = best_cost.latency - op_cost;
2572           synth_mult (alg_in, t + 1, &new_limit, mode);
2573
2574           alg_in->cost.cost += op_cost;
2575           alg_in->cost.latency += op_cost;
2576           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2577             {
2578               struct algorithm *x;
2579               best_cost = alg_in->cost;
2580               x = alg_in, alg_in = best_alg, best_alg = x;
2581               best_alg->log[best_alg->ops] = 0;
2582               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2583             }
2584         }
2585       else
2586         {
2587           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2588
2589           op_cost = add_cost (speed, mode);
2590           new_limit.cost = best_cost.cost - op_cost;
2591           new_limit.latency = best_cost.latency - op_cost;
2592           synth_mult (alg_in, t - 1, &new_limit, mode);
2593
2594           alg_in->cost.cost += op_cost;
2595           alg_in->cost.latency += op_cost;
2596           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2597             {
2598               struct algorithm *x;
2599               best_cost = alg_in->cost;
2600               x = alg_in, alg_in = best_alg, best_alg = x;
2601               best_alg->log[best_alg->ops] = 0;
2602               best_alg->op[best_alg->ops] = alg_add_t_m2;
2603             }
2604         }
2605
2606       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2607          quickly with a - a * n for some appropriate constant n.  */
2608       m = exact_log2 (-orig_t + 1);
2609       if (m >= 0 && m < maxm)
2610         {
2611           op_cost = shiftsub1_cost (speed, mode, m);
2612           new_limit.cost = best_cost.cost - op_cost;
2613           new_limit.latency = best_cost.latency - op_cost;
2614           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2615                       &new_limit, mode);
2616
2617           alg_in->cost.cost += op_cost;
2618           alg_in->cost.latency += op_cost;
2619           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2620             {
2621               struct algorithm *x;
2622               best_cost = alg_in->cost;
2623               x = alg_in, alg_in = best_alg, best_alg = x;
2624               best_alg->log[best_alg->ops] = m;
2625               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2626             }
2627         }
2628
2629       if (cache_hit)
2630         goto done;
2631     }
2632
2633   /* Look for factors of t of the form
2634      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2635      If we find such a factor, we can multiply by t using an algorithm that
2636      multiplies by q, shift the result by m and add/subtract it to itself.
2637
2638      We search for large factors first and loop down, even if large factors
2639      are less probable than small; if we find a large factor we will find a
2640      good sequence quickly, and therefore be able to prune (by decreasing
2641      COST_LIMIT) the search.  */
2642
2643  do_alg_addsub_factor:
2644   for (m = floor_log2 (t - 1); m >= 2; m--)
2645     {
2646       unsigned HOST_WIDE_INT d;
2647
2648       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2649       if (t % d == 0 && t > d && m < maxm
2650           && (!cache_hit || cache_alg == alg_add_factor))
2651         {
2652           /* If the target has a cheap shift-and-add instruction use
2653              that in preference to a shift insn followed by an add insn.
2654              Assume that the shift-and-add is "atomic" with a latency
2655              equal to its cost, otherwise assume that on superscalar
2656              hardware the shift may be executed concurrently with the
2657              earlier steps in the algorithm.  */
2658           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2659           if (shiftadd_cost (speed, mode, m) < op_cost)
2660             {
2661               op_cost = shiftadd_cost (speed, mode, m);
2662               op_latency = op_cost;
2663             }
2664           else
2665             op_latency = add_cost (speed, mode);
2666
2667           new_limit.cost = best_cost.cost - op_cost;
2668           new_limit.latency = best_cost.latency - op_latency;
2669           synth_mult (alg_in, t / d, &new_limit, mode);
2670
2671           alg_in->cost.cost += op_cost;
2672           alg_in->cost.latency += op_latency;
2673           if (alg_in->cost.latency < op_cost)
2674             alg_in->cost.latency = op_cost;
2675           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2676             {
2677               struct algorithm *x;
2678               best_cost = alg_in->cost;
2679               x = alg_in, alg_in = best_alg, best_alg = x;
2680               best_alg->log[best_alg->ops] = m;
2681               best_alg->op[best_alg->ops] = alg_add_factor;
2682             }
2683           /* Other factors will have been taken care of in the recursion.  */
2684           break;
2685         }
2686
2687       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2688       if (t % d == 0 && t > d && m < maxm
2689           && (!cache_hit || cache_alg == alg_sub_factor))
2690         {
2691           /* If the target has a cheap shift-and-subtract insn use
2692              that in preference to a shift insn followed by a sub insn.
2693              Assume that the shift-and-sub is "atomic" with a latency
2694              equal to it's cost, otherwise assume that on superscalar
2695              hardware the shift may be executed concurrently with the
2696              earlier steps in the algorithm.  */
2697           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2698           if (shiftsub0_cost (speed, mode, m) < op_cost)
2699             {
2700               op_cost = shiftsub0_cost (speed, mode, m);
2701               op_latency = op_cost;
2702             }
2703           else
2704             op_latency = add_cost (speed, mode);
2705
2706           new_limit.cost = best_cost.cost - op_cost;
2707           new_limit.latency = best_cost.latency - op_latency;
2708           synth_mult (alg_in, t / d, &new_limit, mode);
2709
2710           alg_in->cost.cost += op_cost;
2711           alg_in->cost.latency += op_latency;
2712           if (alg_in->cost.latency < op_cost)
2713             alg_in->cost.latency = op_cost;
2714           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2715             {
2716               struct algorithm *x;
2717               best_cost = alg_in->cost;
2718               x = alg_in, alg_in = best_alg, best_alg = x;
2719               best_alg->log[best_alg->ops] = m;
2720               best_alg->op[best_alg->ops] = alg_sub_factor;
2721             }
2722           break;
2723         }
2724     }
2725   if (cache_hit)
2726     goto done;
2727
2728   /* Try shift-and-add (load effective address) instructions,
2729      i.e. do a*3, a*5, a*9.  */
2730   if ((t & 1) != 0)
2731     {
2732     do_alg_add_t2_m:
2733       q = t - 1;
2734       q = q & -q;
2735       m = exact_log2 (q);
2736       if (m >= 0 && m < maxm)
2737         {
2738           op_cost = shiftadd_cost (speed, mode, m);
2739           new_limit.cost = best_cost.cost - op_cost;
2740           new_limit.latency = best_cost.latency - op_cost;
2741           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2742
2743           alg_in->cost.cost += op_cost;
2744           alg_in->cost.latency += op_cost;
2745           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2746             {
2747               struct algorithm *x;
2748               best_cost = alg_in->cost;
2749               x = alg_in, alg_in = best_alg, best_alg = x;
2750               best_alg->log[best_alg->ops] = m;
2751               best_alg->op[best_alg->ops] = alg_add_t2_m;
2752             }
2753         }
2754       if (cache_hit)
2755         goto done;
2756
2757     do_alg_sub_t2_m:
2758       q = t + 1;
2759       q = q & -q;
2760       m = exact_log2 (q);
2761       if (m >= 0 && m < maxm)
2762         {
2763           op_cost = shiftsub0_cost (speed, mode, m);
2764           new_limit.cost = best_cost.cost - op_cost;
2765           new_limit.latency = best_cost.latency - op_cost;
2766           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2767
2768           alg_in->cost.cost += op_cost;
2769           alg_in->cost.latency += op_cost;
2770           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2771             {
2772               struct algorithm *x;
2773               best_cost = alg_in->cost;
2774               x = alg_in, alg_in = best_alg, best_alg = x;
2775               best_alg->log[best_alg->ops] = m;
2776               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2777             }
2778         }
2779       if (cache_hit)
2780         goto done;
2781     }
2782
2783  done:
2784   /* If best_cost has not decreased, we have not found any algorithm.  */
2785   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2786     {
2787       /* We failed to find an algorithm.  Record alg_impossible for
2788          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2789          we are asked to find an algorithm for T within the same or
2790          lower COST_LIMIT, we can immediately return to the
2791          caller.  */
2792       entry_ptr->t = t;
2793       entry_ptr->mode = mode;
2794       entry_ptr->speed = speed;
2795       entry_ptr->alg = alg_impossible;
2796       entry_ptr->cost = *cost_limit;
2797       return;
2798     }
2799
2800   /* Cache the result.  */
2801   if (!cache_hit)
2802     {
2803       entry_ptr->t = t;
2804       entry_ptr->mode = mode;
2805       entry_ptr->speed = speed;
2806       entry_ptr->alg = best_alg->op[best_alg->ops];
2807       entry_ptr->cost.cost = best_cost.cost;
2808       entry_ptr->cost.latency = best_cost.latency;
2809     }
2810
2811   /* If we are getting a too long sequence for `struct algorithm'
2812      to record, make this search fail.  */
2813   if (best_alg->ops == MAX_BITS_PER_WORD)
2814     return;
2815
2816   /* Copy the algorithm from temporary space to the space at alg_out.
2817      We avoid using structure assignment because the majority of
2818      best_alg is normally undefined, and this is a critical function.  */
2819   alg_out->ops = best_alg->ops + 1;
2820   alg_out->cost = best_cost;
2821   memcpy (alg_out->op, best_alg->op,
2822           alg_out->ops * sizeof *alg_out->op);
2823   memcpy (alg_out->log, best_alg->log,
2824           alg_out->ops * sizeof *alg_out->log);
2825 }
2826 \f
2827 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2828    Try three variations:
2829
2830        - a shift/add sequence based on VAL itself
2831        - a shift/add sequence based on -VAL, followed by a negation
2832        - a shift/add sequence based on VAL - 1, followed by an addition.
2833
2834    Return true if the cheapest of these cost less than MULT_COST,
2835    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2836
2837 static bool
2838 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
2839                      struct algorithm *alg, enum mult_variant *variant,
2840                      int mult_cost)
2841 {
2842   struct algorithm alg2;
2843   struct mult_cost limit;
2844   int op_cost;
2845   bool speed = optimize_insn_for_speed_p ();
2846
2847   /* Fail quickly for impossible bounds.  */
2848   if (mult_cost < 0)
2849     return false;
2850
2851   /* Ensure that mult_cost provides a reasonable upper bound.
2852      Any constant multiplication can be performed with less
2853      than 2 * bits additions.  */
2854   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2855   if (mult_cost > op_cost)
2856     mult_cost = op_cost;
2857
2858   *variant = basic_variant;
2859   limit.cost = mult_cost;
2860   limit.latency = mult_cost;
2861   synth_mult (alg, val, &limit, mode);
2862
2863   /* This works only if the inverted value actually fits in an
2864      `unsigned int' */
2865   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2866     {
2867       op_cost = neg_cost (speed, mode);
2868       if (MULT_COST_LESS (&alg->cost, mult_cost))
2869         {
2870           limit.cost = alg->cost.cost - op_cost;
2871           limit.latency = alg->cost.latency - op_cost;
2872         }
2873       else
2874         {
2875           limit.cost = mult_cost - op_cost;
2876           limit.latency = mult_cost - op_cost;
2877         }
2878
2879       synth_mult (&alg2, -val, &limit, mode);
2880       alg2.cost.cost += op_cost;
2881       alg2.cost.latency += op_cost;
2882       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2883         *alg = alg2, *variant = negate_variant;
2884     }
2885
2886   /* This proves very useful for division-by-constant.  */
2887   op_cost = add_cost (speed, mode);
2888   if (MULT_COST_LESS (&alg->cost, mult_cost))
2889     {
2890       limit.cost = alg->cost.cost - op_cost;
2891       limit.latency = alg->cost.latency - op_cost;
2892     }
2893   else
2894     {
2895       limit.cost = mult_cost - op_cost;
2896       limit.latency = mult_cost - op_cost;
2897     }
2898
2899   synth_mult (&alg2, val - 1, &limit, mode);
2900   alg2.cost.cost += op_cost;
2901   alg2.cost.latency += op_cost;
2902   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2903     *alg = alg2, *variant = add_variant;
2904
2905   return MULT_COST_LESS (&alg->cost, mult_cost);
2906 }
2907
2908 /* A subroutine of expand_mult, used for constant multiplications.
2909    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2910    convenient.  Use the shift/add sequence described by ALG and apply
2911    the final fixup specified by VARIANT.  */
2912
2913 static rtx
2914 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
2915                    rtx target, const struct algorithm *alg,
2916                    enum mult_variant variant)
2917 {
2918   HOST_WIDE_INT val_so_far;
2919   rtx_insn *insn;
2920   rtx accum, tem;
2921   int opno;
2922   machine_mode nmode;
2923
2924   /* Avoid referencing memory over and over and invalid sharing
2925      on SUBREGs.  */
2926   op0 = force_reg (mode, op0);
2927
2928   /* ACCUM starts out either as OP0 or as a zero, depending on
2929      the first operation.  */
2930
2931   if (alg->op[0] == alg_zero)
2932     {
2933       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2934       val_so_far = 0;
2935     }
2936   else if (alg->op[0] == alg_m)
2937     {
2938       accum = copy_to_mode_reg (mode, op0);
2939       val_so_far = 1;
2940     }
2941   else
2942     gcc_unreachable ();
2943
2944   for (opno = 1; opno < alg->ops; opno++)
2945     {
2946       int log = alg->log[opno];
2947       rtx shift_subtarget = optimize ? 0 : accum;
2948       rtx add_target
2949         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2950            && !optimize)
2951           ? target : 0;
2952       rtx accum_target = optimize ? 0 : accum;
2953       rtx accum_inner;
2954
2955       switch (alg->op[opno])
2956         {
2957         case alg_shift:
2958           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2959           /* REG_EQUAL note will be attached to the following insn.  */
2960           emit_move_insn (accum, tem);
2961           val_so_far <<= log;
2962           break;
2963
2964         case alg_add_t_m2:
2965           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2966           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2967                                  add_target ? add_target : accum_target);
2968           val_so_far += (HOST_WIDE_INT) 1 << log;
2969           break;
2970
2971         case alg_sub_t_m2:
2972           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2973           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2974                                  add_target ? add_target : accum_target);
2975           val_so_far -= (HOST_WIDE_INT) 1 << log;
2976           break;
2977
2978         case alg_add_t2_m:
2979           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2980                                 log, shift_subtarget, 0);
2981           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2982                                  add_target ? add_target : accum_target);
2983           val_so_far = (val_so_far << log) + 1;
2984           break;
2985
2986         case alg_sub_t2_m:
2987           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2988                                 log, shift_subtarget, 0);
2989           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2990                                  add_target ? add_target : accum_target);
2991           val_so_far = (val_so_far << log) - 1;
2992           break;
2993
2994         case alg_add_factor:
2995           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2996           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2997                                  add_target ? add_target : accum_target);
2998           val_so_far += val_so_far << log;
2999           break;
3000
3001         case alg_sub_factor:
3002           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3003           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3004                                  (add_target
3005                                   ? add_target : (optimize ? 0 : tem)));
3006           val_so_far = (val_so_far << log) - val_so_far;
3007           break;
3008
3009         default:
3010           gcc_unreachable ();
3011         }
3012
3013       if (SCALAR_INT_MODE_P (mode))
3014         {
3015           /* Write a REG_EQUAL note on the last insn so that we can cse
3016              multiplication sequences.  Note that if ACCUM is a SUBREG,
3017              we've set the inner register and must properly indicate that.  */
3018           tem = op0, nmode = mode;
3019           accum_inner = accum;
3020           if (GET_CODE (accum) == SUBREG)
3021             {
3022               accum_inner = SUBREG_REG (accum);
3023               nmode = GET_MODE (accum_inner);
3024               tem = gen_lowpart (nmode, op0);
3025             }
3026
3027           insn = get_last_insn ();
3028           set_dst_reg_note (insn, REG_EQUAL,
3029                             gen_rtx_MULT (nmode, tem,
3030                                           gen_int_mode (val_so_far, nmode)),
3031                             accum_inner);
3032         }
3033     }
3034
3035   if (variant == negate_variant)
3036     {
3037       val_so_far = -val_so_far;
3038       accum = expand_unop (mode, neg_optab, accum, target, 0);
3039     }
3040   else if (variant == add_variant)
3041     {
3042       val_so_far = val_so_far + 1;
3043       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3044     }
3045
3046   /* Compare only the bits of val and val_so_far that are significant
3047      in the result mode, to avoid sign-/zero-extension confusion.  */
3048   nmode = GET_MODE_INNER (mode);
3049   if (nmode == VOIDmode)
3050     nmode = mode;
3051   val &= GET_MODE_MASK (nmode);
3052   val_so_far &= GET_MODE_MASK (nmode);
3053   gcc_assert (val == val_so_far);
3054
3055   return accum;
3056 }
3057
3058 /* Perform a multiplication and return an rtx for the result.
3059    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3060    TARGET is a suggestion for where to store the result (an rtx).
3061
3062    We check specially for a constant integer as OP1.
3063    If you want this check for OP0 as well, then before calling
3064    you should swap the two operands if OP0 would be constant.  */
3065
3066 rtx
3067 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3068              int unsignedp)
3069 {
3070   enum mult_variant variant;
3071   struct algorithm algorithm;
3072   rtx scalar_op1;
3073   int max_cost;
3074   bool speed = optimize_insn_for_speed_p ();
3075   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3076
3077   if (CONSTANT_P (op0))
3078     {
3079       rtx temp = op0;
3080       op0 = op1;
3081       op1 = temp;
3082     }
3083
3084   /* For vectors, there are several simplifications that can be made if
3085      all elements of the vector constant are identical.  */
3086   scalar_op1 = op1;
3087   if (GET_CODE (op1) == CONST_VECTOR)
3088     {
3089       int i, n = CONST_VECTOR_NUNITS (op1);
3090       scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3091       for (i = 1; i < n; ++i)
3092         if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3093           goto skip_scalar;
3094     }
3095
3096   if (INTEGRAL_MODE_P (mode))
3097     {
3098       rtx fake_reg;
3099       HOST_WIDE_INT coeff;
3100       bool is_neg;
3101       int mode_bitsize;
3102
3103       if (op1 == CONST0_RTX (mode))
3104         return op1;
3105       if (op1 == CONST1_RTX (mode))
3106         return op0;
3107       if (op1 == CONSTM1_RTX (mode))
3108         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3109                             op0, target, 0);
3110
3111       if (do_trapv)
3112         goto skip_synth;
3113
3114       /* If mode is integer vector mode, check if the backend supports
3115          vector lshift (by scalar or vector) at all.  If not, we can't use
3116          synthetized multiply.  */
3117       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3118           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3119           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3120         goto skip_synth;
3121
3122       /* These are the operations that are potentially turned into
3123          a sequence of shifts and additions.  */
3124       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3125
3126       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3127          less than or equal in size to `unsigned int' this doesn't matter.
3128          If the mode is larger than `unsigned int', then synth_mult works
3129          only if the constant value exactly fits in an `unsigned int' without
3130          any truncation.  This means that multiplying by negative values does
3131          not work; results are off by 2^32 on a 32 bit machine.  */
3132       if (CONST_INT_P (scalar_op1))
3133         {
3134           coeff = INTVAL (scalar_op1);
3135           is_neg = coeff < 0;
3136         }
3137 #if TARGET_SUPPORTS_WIDE_INT
3138       else if (CONST_WIDE_INT_P (scalar_op1))
3139 #else
3140       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3141 #endif
3142         {
3143           int shift = wi::exact_log2 (std::make_pair (scalar_op1, mode));
3144           /* Perfect power of 2 (other than 1, which is handled above).  */
3145           if (shift > 0)
3146             return expand_shift (LSHIFT_EXPR, mode, op0,
3147                                  shift, target, unsignedp);
3148           else
3149             goto skip_synth;
3150         }
3151       else
3152         goto skip_synth;
3153
3154       /* We used to test optimize here, on the grounds that it's better to
3155          produce a smaller program when -O is not used.  But this causes
3156          such a terrible slowdown sometimes that it seems better to always
3157          use synth_mult.  */
3158
3159       /* Special case powers of two.  */
3160       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3161           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3162         return expand_shift (LSHIFT_EXPR, mode, op0,
3163                              floor_log2 (coeff), target, unsignedp);
3164
3165       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3166
3167       /* Attempt to handle multiplication of DImode values by negative
3168          coefficients, by performing the multiplication by a positive
3169          multiplier and then inverting the result.  */
3170       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3171         {
3172           /* Its safe to use -coeff even for INT_MIN, as the
3173              result is interpreted as an unsigned coefficient.
3174              Exclude cost of op0 from max_cost to match the cost
3175              calculation of the synth_mult.  */
3176           coeff = -(unsigned HOST_WIDE_INT) coeff;
3177           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3178                       - neg_cost (speed, mode));
3179           if (max_cost <= 0)
3180             goto skip_synth;
3181
3182           /* Special case powers of two.  */
3183           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3184             {
3185               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3186                                        floor_log2 (coeff), target, unsignedp);
3187               return expand_unop (mode, neg_optab, temp, target, 0);
3188             }
3189
3190           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3191                                    max_cost))
3192             {
3193               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3194                                             &algorithm, variant);
3195               return expand_unop (mode, neg_optab, temp, target, 0);
3196             }
3197           goto skip_synth;
3198         }
3199
3200       /* Exclude cost of op0 from max_cost to match the cost
3201          calculation of the synth_mult.  */
3202       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3203       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3204         return expand_mult_const (mode, op0, coeff, target,
3205                                   &algorithm, variant);
3206     }
3207  skip_synth:
3208
3209   /* Expand x*2.0 as x+x.  */
3210   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3211     {
3212       REAL_VALUE_TYPE d;
3213       REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3214
3215       if (REAL_VALUES_EQUAL (d, dconst2))
3216         {
3217           op0 = force_reg (GET_MODE (op0), op0);
3218           return expand_binop (mode, add_optab, op0, op0,
3219                                target, unsignedp, OPTAB_LIB_WIDEN);
3220         }
3221     }
3222  skip_scalar:
3223
3224   /* This used to use umul_optab if unsigned, but for non-widening multiply
3225      there is no difference between signed and unsigned.  */
3226   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3227                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3228   gcc_assert (op0);
3229   return op0;
3230 }
3231
3232 /* Return a cost estimate for multiplying a register by the given
3233    COEFFicient in the given MODE and SPEED.  */
3234
3235 int
3236 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3237 {
3238   int max_cost;
3239   struct algorithm algorithm;
3240   enum mult_variant variant;
3241
3242   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3243   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3244   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3245     return algorithm.cost.cost;
3246   else
3247     return max_cost;
3248 }
3249
3250 /* Perform a widening multiplication and return an rtx for the result.
3251    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3252    TARGET is a suggestion for where to store the result (an rtx).
3253    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3254    or smul_widen_optab.
3255
3256    We check specially for a constant integer as OP1, comparing the
3257    cost of a widening multiply against the cost of a sequence of shifts
3258    and adds.  */
3259
3260 rtx
3261 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3262                       int unsignedp, optab this_optab)
3263 {
3264   bool speed = optimize_insn_for_speed_p ();
3265   rtx cop1;
3266
3267   if (CONST_INT_P (op1)
3268       && GET_MODE (op0) != VOIDmode
3269       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3270                                 this_optab == umul_widen_optab))
3271       && CONST_INT_P (cop1)
3272       && (INTVAL (cop1) >= 0
3273           || HWI_COMPUTABLE_MODE_P (mode)))
3274     {
3275       HOST_WIDE_INT coeff = INTVAL (cop1);
3276       int max_cost;
3277       enum mult_variant variant;
3278       struct algorithm algorithm;
3279
3280       /* Special case powers of two.  */
3281       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3282         {
3283           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3284           return expand_shift (LSHIFT_EXPR, mode, op0,
3285                                floor_log2 (coeff), target, unsignedp);
3286         }
3287
3288       /* Exclude cost of op0 from max_cost to match the cost
3289          calculation of the synth_mult.  */
3290       max_cost = mul_widen_cost (speed, mode);
3291       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3292                                max_cost))
3293         {
3294           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3295           return expand_mult_const (mode, op0, coeff, target,
3296                                     &algorithm, variant);
3297         }
3298     }
3299   return expand_binop (mode, this_optab, op0, op1, target,
3300                        unsignedp, OPTAB_LIB_WIDEN);
3301 }
3302 \f
3303 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3304    replace division by D, and put the least significant N bits of the result
3305    in *MULTIPLIER_PTR and return the most significant bit.
3306
3307    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3308    needed precision is in PRECISION (should be <= N).
3309
3310    PRECISION should be as small as possible so this function can choose
3311    multiplier more freely.
3312
3313    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3314    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3315
3316    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3317    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3318
3319 unsigned HOST_WIDE_INT
3320 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3321                    unsigned HOST_WIDE_INT *multiplier_ptr,
3322                    int *post_shift_ptr, int *lgup_ptr)
3323 {
3324   int lgup, post_shift;
3325   int pow, pow2;
3326
3327   /* lgup = ceil(log2(divisor)); */
3328   lgup = ceil_log2 (d);
3329
3330   gcc_assert (lgup <= n);
3331
3332   pow = n + lgup;
3333   pow2 = n + lgup - precision;
3334
3335   /* mlow = 2^(N + lgup)/d */
3336   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3337   wide_int mlow = wi::udiv_trunc (val, d);
3338
3339   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3340   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3341   wide_int mhigh = wi::udiv_trunc (val, d);
3342
3343   /* If precision == N, then mlow, mhigh exceed 2^N
3344      (but they do not exceed 2^(N+1)).  */
3345
3346   /* Reduce to lowest terms.  */
3347   for (post_shift = lgup; post_shift > 0; post_shift--)
3348     {
3349       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3350                                                        HOST_BITS_PER_WIDE_INT);
3351       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3352                                                        HOST_BITS_PER_WIDE_INT);
3353       if (ml_lo >= mh_lo)
3354         break;
3355
3356       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3357       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3358     }
3359
3360   *post_shift_ptr = post_shift;
3361   *lgup_ptr = lgup;
3362   if (n < HOST_BITS_PER_WIDE_INT)
3363     {
3364       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3365       *multiplier_ptr = mhigh.to_uhwi () & mask;
3366       return mhigh.to_uhwi () >= mask;
3367     }
3368   else
3369     {
3370       *multiplier_ptr = mhigh.to_uhwi ();
3371       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3372     }
3373 }
3374
3375 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3376    congruent to 1 (mod 2**N).  */
3377
3378 static unsigned HOST_WIDE_INT
3379 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3380 {
3381   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3382
3383   /* The algorithm notes that the choice y = x satisfies
3384      x*y == 1 mod 2^3, since x is assumed odd.
3385      Each iteration doubles the number of bits of significance in y.  */
3386
3387   unsigned HOST_WIDE_INT mask;
3388   unsigned HOST_WIDE_INT y = x;
3389   int nbit = 3;
3390
3391   mask = (n == HOST_BITS_PER_WIDE_INT
3392           ? ~(unsigned HOST_WIDE_INT) 0
3393           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3394
3395   while (nbit < n)
3396     {
3397       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3398       nbit *= 2;
3399     }
3400   return y;
3401 }
3402
3403 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3404    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3405    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3406    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3407    become signed.
3408
3409    The result is put in TARGET if that is convenient.
3410
3411    MODE is the mode of operation.  */
3412
3413 rtx
3414 expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
3415                              rtx op1, rtx target, int unsignedp)
3416 {
3417   rtx tem;
3418   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3419
3420   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3421                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3422   tem = expand_and (mode, tem, op1, NULL_RTX);
3423   adj_operand
3424     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3425                      adj_operand);
3426
3427   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3428                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3429   tem = expand_and (mode, tem, op0, NULL_RTX);
3430   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3431                           target);
3432
3433   return target;
3434 }
3435
3436 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3437
3438 static rtx
3439 extract_high_half (machine_mode mode, rtx op)
3440 {
3441   machine_mode wider_mode;
3442
3443   if (mode == word_mode)
3444     return gen_highpart (mode, op);
3445
3446   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3447
3448   wider_mode = GET_MODE_WIDER_MODE (mode);
3449   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3450                      GET_MODE_BITSIZE (mode), 0, 1);
3451   return convert_modes (mode, wider_mode, op, 0);
3452 }
3453
3454 /* Like expmed_mult_highpart, but only consider using a multiplication
3455    optab.  OP1 is an rtx for the constant operand.  */
3456
3457 static rtx
3458 expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
3459                             rtx target, int unsignedp, int max_cost)
3460 {
3461   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3462   machine_mode wider_mode;
3463   optab moptab;
3464   rtx tem;
3465   int size;
3466   bool speed = optimize_insn_for_speed_p ();
3467
3468   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3469
3470   wider_mode = GET_MODE_WIDER_MODE (mode);
3471   size = GET_MODE_BITSIZE (mode);
3472
3473   /* Firstly, try using a multiplication insn that only generates the needed
3474      high part of the product, and in the sign flavor of unsignedp.  */
3475   if (mul_highpart_cost (speed, mode) < max_cost)
3476     {
3477       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3478       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3479                           unsignedp, OPTAB_DIRECT);
3480       if (tem)
3481         return tem;
3482     }
3483
3484   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3485      Need to adjust the result after the multiplication.  */
3486   if (size - 1 < BITS_PER_WORD
3487       && (mul_highpart_cost (speed, mode)
3488           + 2 * shift_cost (speed, mode, size-1)
3489           + 4 * add_cost (speed, mode) < max_cost))
3490     {
3491       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3492       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3493                           unsignedp, OPTAB_DIRECT);
3494       if (tem)
3495         /* We used the wrong signedness.  Adjust the result.  */
3496         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3497                                             tem, unsignedp);
3498     }
3499
3500   /* Try widening multiplication.  */
3501   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3502   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3503       && mul_widen_cost (speed, wider_mode) < max_cost)
3504     {
3505       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3506                           unsignedp, OPTAB_WIDEN);
3507       if (tem)
3508         return extract_high_half (mode, tem);
3509     }
3510
3511   /* Try widening the mode and perform a non-widening multiplication.  */
3512   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3513       && size - 1 < BITS_PER_WORD
3514       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3515           < max_cost))
3516     {
3517       rtx_insn *insns;
3518       rtx wop0, wop1;
3519
3520       /* We need to widen the operands, for example to ensure the
3521          constant multiplier is correctly sign or zero extended.
3522          Use a sequence to clean-up any instructions emitted by
3523          the conversions if things don't work out.  */
3524       start_sequence ();
3525       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3526       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3527       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3528                           unsignedp, OPTAB_WIDEN);
3529       insns = get_insns ();
3530       end_sequence ();
3531
3532       if (tem)
3533         {
3534           emit_insn (insns);
3535           return extract_high_half (mode, tem);
3536         }
3537     }
3538
3539   /* Try widening multiplication of opposite signedness, and adjust.  */
3540   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3541   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3542       && size - 1 < BITS_PER_WORD
3543       && (mul_widen_cost (speed, wider_mode)
3544           + 2 * shift_cost (speed, mode, size-1)
3545           + 4 * add_cost (speed, mode) < max_cost))
3546     {
3547       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3548                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3549       if (tem != 0)
3550         {
3551           tem = extract_high_half (mode, tem);
3552           /* We used the wrong signedness.  Adjust the result.  */
3553           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3554                                               target, unsignedp);
3555         }
3556     }
3557
3558   return 0;
3559 }
3560
3561 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3562    putting the high half of the result in TARGET if that is convenient,
3563    and return where the result is.  If the operation can not be performed,
3564    0 is returned.
3565
3566    MODE is the mode of operation and result.
3567
3568    UNSIGNEDP nonzero means unsigned multiply.
3569
3570    MAX_COST is the total allowed cost for the expanded RTL.  */
3571
3572 static rtx
3573 expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
3574                       rtx target, int unsignedp, int max_cost)
3575 {
3576   machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3577   unsigned HOST_WIDE_INT cnst1;
3578   int extra_cost;
3579   bool sign_adjust = false;
3580   enum mult_variant variant;
3581   struct algorithm alg;
3582   rtx tem;
3583   bool speed = optimize_insn_for_speed_p ();
3584
3585   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3586   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3587   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3588
3589   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3590
3591   /* We can't optimize modes wider than BITS_PER_WORD.
3592      ??? We might be able to perform double-word arithmetic if
3593      mode == word_mode, however all the cost calculations in
3594      synth_mult etc. assume single-word operations.  */
3595   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3596     return expmed_mult_highpart_optab (mode, op0, op1, target,
3597                                        unsignedp, max_cost);
3598
3599   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3600
3601   /* Check whether we try to multiply by a negative constant.  */
3602   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3603     {
3604       sign_adjust = true;
3605       extra_cost += add_cost (speed, mode);
3606     }
3607
3608   /* See whether shift/add multiplication is cheap enough.  */
3609   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3610                            max_cost - extra_cost))
3611     {
3612       /* See whether the specialized multiplication optabs are
3613          cheaper than the shift/add version.  */
3614       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3615                                         alg.cost.cost + extra_cost);
3616       if (tem)
3617         return tem;
3618
3619       tem = convert_to_mode (wider_mode, op0, unsignedp);
3620       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3621       tem = extract_high_half (mode, tem);
3622
3623       /* Adjust result for signedness.  */
3624       if (sign_adjust)
3625         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3626
3627       return tem;
3628     }
3629   return expmed_mult_highpart_optab (mode, op0, op1, target,
3630                                      unsignedp, max_cost);
3631 }
3632
3633
3634 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3635
3636 static rtx
3637 expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3638 {
3639   rtx result, temp, shift;
3640   rtx_code_label *label;
3641   int logd;
3642   int prec = GET_MODE_PRECISION (mode);
3643
3644   logd = floor_log2 (d);
3645   result = gen_reg_rtx (mode);
3646
3647   /* Avoid conditional branches when they're expensive.  */
3648   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3649       && optimize_insn_for_speed_p ())
3650     {
3651       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3652                                       mode, 0, -1);
3653       if (signmask)
3654         {
3655           HOST_WIDE_INT masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3656           signmask = force_reg (mode, signmask);
3657           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3658
3659           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3660              which instruction sequence to use.  If logical right shifts
3661              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3662              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3663
3664           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3665           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3666               || (set_src_cost (temp, optimize_insn_for_speed_p ())
3667                   > COSTS_N_INSNS (2)))
3668             {
3669               temp = expand_binop (mode, xor_optab, op0, signmask,
3670                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3671               temp = expand_binop (mode, sub_optab, temp, signmask,
3672                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3673               temp = expand_binop (mode, and_optab, temp,
3674                                    gen_int_mode (masklow, mode),
3675                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3676               temp = expand_binop (mode, xor_optab, temp, signmask,
3677                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3678               temp = expand_binop (mode, sub_optab, temp, signmask,
3679                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3680             }
3681           else
3682             {
3683               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3684                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3685               signmask = force_reg (mode, signmask);
3686
3687               temp = expand_binop (mode, add_optab, op0, signmask,
3688                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3689               temp = expand_binop (mode, and_optab, temp,
3690                                    gen_int_mode (masklow, mode),
3691                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3692               temp = expand_binop (mode, sub_optab, temp, signmask,
3693                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3694             }
3695           return temp;
3696         }
3697     }
3698
3699   /* Mask contains the mode's signbit and the significant bits of the
3700      modulus.  By including the signbit in the operation, many targets
3701      can avoid an explicit compare operation in the following comparison
3702      against zero.  */
3703   wide_int mask = wi::mask (logd, false, prec);
3704   mask = wi::set_bit (mask, prec - 1);
3705
3706   temp = expand_binop (mode, and_optab, op0,
3707                        immed_wide_int_const (mask, mode),
3708                        result, 1, OPTAB_LIB_WIDEN);
3709   if (temp != result)
3710     emit_move_insn (result, temp);
3711
3712   label = gen_label_rtx ();
3713   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3714
3715   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3716                        0, OPTAB_LIB_WIDEN);
3717
3718   mask = wi::mask (logd, true, prec);
3719   temp = expand_binop (mode, ior_optab, temp,
3720                        immed_wide_int_const (mask, mode),
3721                        result, 1, OPTAB_LIB_WIDEN);
3722   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3723                        0, OPTAB_LIB_WIDEN);
3724   if (temp != result)
3725     emit_move_insn (result, temp);
3726   emit_label (label);
3727   return result;
3728 }
3729
3730 /* Expand signed division of OP0 by a power of two D in mode MODE.
3731    This routine is only called for positive values of D.  */
3732
3733 static rtx
3734 expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3735 {
3736   rtx temp;
3737   rtx_code_label *label;
3738   int logd;
3739
3740   logd = floor_log2 (d);
3741
3742   if (d == 2
3743       && BRANCH_COST (optimize_insn_for_speed_p (),
3744                       false) >= 1)
3745     {
3746       temp = gen_reg_rtx (mode);
3747       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3748       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3749                            0, OPTAB_LIB_WIDEN);
3750       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3751     }
3752
3753 #ifdef HAVE_conditional_move
3754   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3755       >= 2)
3756     {
3757       rtx temp2;
3758
3759       start_sequence ();
3760       temp2 = copy_to_mode_reg (mode, op0);
3761       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3762                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3763       temp = force_reg (mode, temp);
3764
3765       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3766       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3767                                      mode, temp, temp2, mode, 0);
3768       if (temp2)
3769         {
3770           rtx_insn *seq = get_insns ();
3771           end_sequence ();
3772           emit_insn (seq);
3773           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3774         }
3775       end_sequence ();
3776     }
3777 #endif
3778
3779   if (BRANCH_COST (optimize_insn_for_speed_p (),
3780                    false) >= 2)
3781     {
3782       int ushift = GET_MODE_BITSIZE (mode) - logd;
3783
3784       temp = gen_reg_rtx (mode);
3785       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3786       if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3787           || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3788              > COSTS_N_INSNS (1))
3789         temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3790                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3791       else
3792         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3793                              ushift, NULL_RTX, 1);
3794       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3795                            0, OPTAB_LIB_WIDEN);
3796       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3797     }
3798
3799   label = gen_label_rtx ();
3800   temp = copy_to_mode_reg (mode, op0);
3801   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3802   expand_inc (temp, gen_int_mode (d - 1, mode));
3803   emit_label (label);
3804   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3805 }
3806 \f
3807 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3808    if that is convenient, and returning where the result is.
3809    You may request either the quotient or the remainder as the result;
3810    specify REM_FLAG nonzero to get the remainder.
3811
3812    CODE is the expression code for which kind of division this is;
3813    it controls how rounding is done.  MODE is the machine mode to use.
3814    UNSIGNEDP nonzero means do unsigned division.  */
3815
3816 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3817    and then correct it by or'ing in missing high bits
3818    if result of ANDI is nonzero.
3819    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3820    This could optimize to a bfexts instruction.
3821    But C doesn't use these operations, so their optimizations are
3822    left for later.  */
3823 /* ??? For modulo, we don't actually need the highpart of the first product,
3824    the low part will do nicely.  And for small divisors, the second multiply
3825    can also be a low-part only multiply or even be completely left out.
3826    E.g. to calculate the remainder of a division by 3 with a 32 bit
3827    multiply, multiply with 0x55555556 and extract the upper two bits;
3828    the result is exact for inputs up to 0x1fffffff.
3829    The input range can be reduced by using cross-sum rules.
3830    For odd divisors >= 3, the following table gives right shift counts
3831    so that if a number is shifted by an integer multiple of the given
3832    amount, the remainder stays the same:
3833    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3834    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3835    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3836    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3837    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3838
3839    Cross-sum rules for even numbers can be derived by leaving as many bits
3840    to the right alone as the divisor has zeros to the right.
3841    E.g. if x is an unsigned 32 bit number:
3842    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3843    */
3844
3845 rtx
3846 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
3847                rtx op0, rtx op1, rtx target, int unsignedp)
3848 {
3849   machine_mode compute_mode;
3850   rtx tquotient;
3851   rtx quotient = 0, remainder = 0;
3852   rtx_insn *last;
3853   int size;
3854   rtx_insn *insn;
3855   optab optab1, optab2;
3856   int op1_is_constant, op1_is_pow2 = 0;
3857   int max_cost, extra_cost;
3858   static HOST_WIDE_INT last_div_const = 0;
3859   bool speed = optimize_insn_for_speed_p ();
3860
3861   op1_is_constant = CONST_INT_P (op1);
3862   if (op1_is_constant)
3863     {
3864       unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1);
3865       if (unsignedp)
3866         ext_op1 &= GET_MODE_MASK (mode);
3867       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3868                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3869     }
3870
3871   /*
3872      This is the structure of expand_divmod:
3873
3874      First comes code to fix up the operands so we can perform the operations
3875      correctly and efficiently.
3876
3877      Second comes a switch statement with code specific for each rounding mode.
3878      For some special operands this code emits all RTL for the desired
3879      operation, for other cases, it generates only a quotient and stores it in
3880      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3881      to indicate that it has not done anything.
3882
3883      Last comes code that finishes the operation.  If QUOTIENT is set and
3884      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3885      QUOTIENT is not set, it is computed using trunc rounding.
3886
3887      We try to generate special code for division and remainder when OP1 is a
3888      constant.  If |OP1| = 2**n we can use shifts and some other fast
3889      operations.  For other values of OP1, we compute a carefully selected
3890      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3891      by m.
3892
3893      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3894      half of the product.  Different strategies for generating the product are
3895      implemented in expmed_mult_highpart.
3896
3897      If what we actually want is the remainder, we generate that by another
3898      by-constant multiplication and a subtraction.  */
3899
3900   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3901      code below will malfunction if we are, so check here and handle
3902      the special case if so.  */
3903   if (op1 == const1_rtx)
3904     return rem_flag ? const0_rtx : op0;
3905
3906     /* When dividing by -1, we could get an overflow.
3907      negv_optab can handle overflows.  */
3908   if (! unsignedp && op1 == constm1_rtx)
3909     {
3910       if (rem_flag)
3911         return const0_rtx;
3912       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
3913                           ? negv_optab : neg_optab, op0, target, 0);
3914     }
3915
3916   if (target
3917       /* Don't use the function value register as a target
3918          since we have to read it as well as write it,
3919          and function-inlining gets confused by this.  */
3920       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3921           /* Don't clobber an operand while doing a multi-step calculation.  */
3922           || ((rem_flag || op1_is_constant)
3923               && (reg_mentioned_p (target, op0)
3924                   || (MEM_P (op0) && MEM_P (target))))
3925           || reg_mentioned_p (target, op1)
3926           || (MEM_P (op1) && MEM_P (target))))
3927     target = 0;
3928
3929   /* Get the mode in which to perform this computation.  Normally it will
3930      be MODE, but sometimes we can't do the desired operation in MODE.
3931      If so, pick a wider mode in which we can do the operation.  Convert
3932      to that mode at the start to avoid repeated conversions.
3933
3934      First see what operations we need.  These depend on the expression
3935      we are evaluating.  (We assume that divxx3 insns exist under the
3936      same conditions that modxx3 insns and that these insns don't normally
3937      fail.  If these assumptions are not correct, we may generate less
3938      efficient code in some cases.)
3939
3940      Then see if we find a mode in which we can open-code that operation
3941      (either a division, modulus, or shift).  Finally, check for the smallest
3942      mode for which we can do the operation with a library call.  */
3943
3944   /* We might want to refine this now that we have division-by-constant
3945      optimization.  Since expmed_mult_highpart tries so many variants, it is
3946      not straightforward to generalize this.  Maybe we should make an array
3947      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3948
3949   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3950             ? (unsignedp ? lshr_optab : ashr_optab)
3951             : (unsignedp ? udiv_optab : sdiv_optab));
3952   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3953             ? optab1
3954             : (unsignedp ? udivmod_optab : sdivmod_optab));
3955
3956   for (compute_mode = mode; compute_mode != VOIDmode;
3957        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3958     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3959         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3960       break;
3961
3962   if (compute_mode == VOIDmode)
3963     for (compute_mode = mode; compute_mode != VOIDmode;
3964          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3965       if (optab_libfunc (optab1, compute_mode)
3966           || optab_libfunc (optab2, compute_mode))
3967         break;
3968
3969   /* If we still couldn't find a mode, use MODE, but expand_binop will
3970      probably die.  */
3971   if (compute_mode == VOIDmode)
3972     compute_mode = mode;
3973
3974   if (target && GET_MODE (target) == compute_mode)
3975     tquotient = target;
3976   else
3977     tquotient = gen_reg_rtx (compute_mode);
3978
3979   size = GET_MODE_BITSIZE (compute_mode);
3980 #if 0
3981   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3982      (mode), and thereby get better code when OP1 is a constant.  Do that
3983      later.  It will require going over all usages of SIZE below.  */
3984   size = GET_MODE_BITSIZE (mode);
3985 #endif
3986
3987   /* Only deduct something for a REM if the last divide done was
3988      for a different constant.   Then set the constant of the last
3989      divide.  */
3990   max_cost = (unsignedp
3991               ? udiv_cost (speed, compute_mode)
3992               : sdiv_cost (speed, compute_mode));
3993   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3994                      && INTVAL (op1) == last_div_const))
3995     max_cost -= (mul_cost (speed, compute_mode)
3996                  + add_cost (speed, compute_mode));
3997
3998   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3999
4000   /* Now convert to the best mode to use.  */
4001   if (compute_mode != mode)
4002     {
4003       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4004       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4005
4006       /* convert_modes may have placed op1 into a register, so we
4007          must recompute the following.  */
4008       op1_is_constant = CONST_INT_P (op1);
4009       op1_is_pow2 = (op1_is_constant
4010                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4011                           || (! unsignedp
4012                               && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1))))));
4013     }
4014
4015   /* If one of the operands is a volatile MEM, copy it into a register.  */
4016
4017   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4018     op0 = force_reg (compute_mode, op0);
4019   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4020     op1 = force_reg (compute_mode, op1);
4021
4022   /* If we need the remainder or if OP1 is constant, we need to
4023      put OP0 in a register in case it has any queued subexpressions.  */
4024   if (rem_flag || op1_is_constant)
4025     op0 = force_reg (compute_mode, op0);
4026
4027   last = get_last_insn ();
4028
4029   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4030   if (unsignedp)
4031     {
4032       if (code == FLOOR_DIV_EXPR)
4033         code = TRUNC_DIV_EXPR;
4034       if (code == FLOOR_MOD_EXPR)
4035         code = TRUNC_MOD_EXPR;
4036       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4037         code = TRUNC_DIV_EXPR;
4038     }
4039
4040   if (op1 != const0_rtx)
4041     switch (code)
4042       {
4043       case TRUNC_MOD_EXPR:
4044       case TRUNC_DIV_EXPR:
4045         if (op1_is_constant)
4046           {
4047             if (unsignedp)
4048               {
4049                 unsigned HOST_WIDE_INT mh, ml;
4050                 int pre_shift, post_shift;
4051                 int dummy;
4052                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4053                                             & GET_MODE_MASK (compute_mode));
4054
4055                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4056                   {
4057                     pre_shift = floor_log2 (d);
4058                     if (rem_flag)
4059                       {
4060                         unsigned HOST_WIDE_INT mask
4061                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4062                         remainder
4063                           = expand_binop (compute_mode, and_optab, op0,
4064                                           gen_int_mode (mask, compute_mode),
4065                                           remainder, 1,
4066                                           OPTAB_LIB_WIDEN);
4067                         if (remainder)
4068                           return gen_lowpart (mode, remainder);
4069                       }
4070                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4071                                              pre_shift, tquotient, 1);
4072                   }
4073                 else if (size <= HOST_BITS_PER_WIDE_INT)
4074                   {
4075                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4076                       {
4077                         /* Most significant bit of divisor is set; emit an scc
4078                            insn.  */
4079                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4080                                                           compute_mode, 1, 1);
4081                       }
4082                     else
4083                       {
4084                         /* Find a suitable multiplier and right shift count
4085                            instead of multiplying with D.  */
4086
4087                         mh = choose_multiplier (d, size, size,
4088                                                 &ml, &post_shift, &dummy);
4089
4090                         /* If the suggested multiplier is more than SIZE bits,
4091                            we can do better for even divisors, using an
4092                            initial right shift.  */
4093                         if (mh != 0 && (d & 1) == 0)
4094                           {
4095                             pre_shift = floor_log2 (d & -d);
4096                             mh = choose_multiplier (d >> pre_shift, size,
4097                                                     size - pre_shift,
4098                                                     &ml, &post_shift, &dummy);
4099                             gcc_assert (!mh);
4100                           }
4101                         else
4102                           pre_shift = 0;
4103
4104                         if (mh != 0)
4105                           {
4106                             rtx t1, t2, t3, t4;
4107
4108                             if (post_shift - 1 >= BITS_PER_WORD)
4109                               goto fail1;
4110
4111                             extra_cost
4112                               = (shift_cost (speed, compute_mode, post_shift - 1)
4113                                  + shift_cost (speed, compute_mode, 1)
4114                                  + 2 * add_cost (speed, compute_mode));
4115                             t1 = expmed_mult_highpart
4116                               (compute_mode, op0,
4117                                gen_int_mode (ml, compute_mode),
4118                                NULL_RTX, 1, max_cost - extra_cost);
4119                             if (t1 == 0)
4120                               goto fail1;
4121                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4122                                                                op0, t1),
4123                                                 NULL_RTX);
4124                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4125                                                t2, 1, NULL_RTX, 1);
4126                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4127                                                               t1, t3),
4128                                                 NULL_RTX);
4129                             quotient = expand_shift
4130                               (RSHIFT_EXPR, compute_mode, t4,
4131                                post_shift - 1, tquotient, 1);
4132                           }
4133                         else
4134                           {
4135                             rtx t1, t2;
4136
4137                             if (pre_shift >= BITS_PER_WORD
4138                                 || post_shift >= BITS_PER_WORD)
4139                               goto fail1;
4140
4141                             t1 = expand_shift
4142                               (RSHIFT_EXPR, compute_mode, op0,
4143                                pre_shift, NULL_RTX, 1);
4144                             extra_cost
4145                               = (shift_cost (speed, compute_mode, pre_shift)
4146                                  + shift_cost (speed, compute_mode, post_shift));
4147                             t2 = expmed_mult_highpart
4148                               (compute_mode, t1,
4149                                gen_int_mode (ml, compute_mode),
4150                                NULL_RTX, 1, max_cost - extra_cost);
4151                             if (t2 == 0)
4152                               goto fail1;
4153                             quotient = expand_shift
4154                               (RSHIFT_EXPR, compute_mode, t2,
4155                                post_shift, tquotient, 1);
4156                           }
4157                       }
4158                   }
4159                 else            /* Too wide mode to use tricky code */
4160                   break;
4161
4162                 insn = get_last_insn ();
4163                 if (insn != last)
4164                   set_dst_reg_note (insn, REG_EQUAL,
4165                                     gen_rtx_UDIV (compute_mode, op0, op1),
4166                                     quotient);
4167               }
4168             else                /* TRUNC_DIV, signed */
4169               {
4170                 unsigned HOST_WIDE_INT ml;
4171                 int lgup, post_shift;
4172                 rtx mlr;
4173                 HOST_WIDE_INT d = INTVAL (op1);
4174                 unsigned HOST_WIDE_INT abs_d;
4175
4176                 /* Since d might be INT_MIN, we have to cast to
4177                    unsigned HOST_WIDE_INT before negating to avoid
4178                    undefined signed overflow.  */
4179                 abs_d = (d >= 0
4180                          ? (unsigned HOST_WIDE_INT) d
4181                          : - (unsigned HOST_WIDE_INT) d);
4182
4183                 /* n rem d = n rem -d */
4184                 if (rem_flag && d < 0)
4185                   {
4186                     d = abs_d;
4187                     op1 = gen_int_mode (abs_d, compute_mode);
4188                   }
4189
4190                 if (d == 1)
4191                   quotient = op0;
4192                 else if (d == -1)
4193                   quotient = expand_unop (compute_mode, neg_optab, op0,
4194                                           tquotient, 0);
4195                 else if (HOST_BITS_PER_WIDE_INT >= size
4196                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4197                   {
4198                     /* This case is not handled correctly below.  */
4199                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4200                                                 compute_mode, 1, 1);
4201                     if (quotient == 0)
4202                       goto fail1;
4203                   }
4204                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4205                          && (rem_flag
4206                              ? smod_pow2_cheap (speed, compute_mode)
4207                              : sdiv_pow2_cheap (speed, compute_mode))
4208                          /* We assume that cheap metric is true if the
4209                             optab has an expander for this mode.  */
4210                          && ((optab_handler ((rem_flag ? smod_optab
4211                                               : sdiv_optab),
4212                                              compute_mode)
4213                               != CODE_FOR_nothing)
4214                              || (optab_handler (sdivmod_optab,
4215                                                 compute_mode)
4216                                  != CODE_FOR_nothing)))
4217                   ;
4218                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4219                   {
4220                     if (rem_flag)
4221                       {
4222                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4223                         if (remainder)
4224                           return gen_lowpart (mode, remainder);
4225                       }
4226
4227                     if (sdiv_pow2_cheap (speed, compute_mode)
4228                         && ((optab_handler (sdiv_optab, compute_mode)
4229                              != CODE_FOR_nothing)
4230                             || (optab_handler (sdivmod_optab, compute_mode)
4231                                 != CODE_FOR_nothing)))
4232                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4233                                                 compute_mode, op0,
4234                                                 gen_int_mode (abs_d,
4235                                                               compute_mode),
4236                                                 NULL_RTX, 0);
4237                     else
4238                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4239
4240                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4241                        negate the quotient.  */
4242                     if (d < 0)
4243                       {
4244                         insn = get_last_insn ();
4245                         if (insn != last
4246                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4247                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4248                           set_dst_reg_note (insn, REG_EQUAL,
4249                                             gen_rtx_DIV (compute_mode, op0,
4250                                                          gen_int_mode
4251                                                            (abs_d,
4252                                                             compute_mode)),
4253                                             quotient);
4254
4255                         quotient = expand_unop (compute_mode, neg_optab,
4256                                                 quotient, quotient, 0);
4257                       }
4258                   }
4259                 else if (size <= HOST_BITS_PER_WIDE_INT)
4260                   {
4261                     choose_multiplier (abs_d, size, size - 1,
4262                                        &ml, &post_shift, &lgup);
4263                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4264                       {
4265                         rtx t1, t2, t3;
4266
4267                         if (post_shift >= BITS_PER_WORD
4268                             || size - 1 >= BITS_PER_WORD)
4269                           goto fail1;
4270
4271                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4272                                       + shift_cost (speed, compute_mode, size - 1)
4273                                       + add_cost (speed, compute_mode));
4274                         t1 = expmed_mult_highpart
4275                           (compute_mode, op0, gen_int_mode (ml, compute_mode),
4276                            NULL_RTX, 0, max_cost - extra_cost);
4277                         if (t1 == 0)
4278                           goto fail1;
4279                         t2 = expand_shift
4280                           (RSHIFT_EXPR, compute_mode, t1,
4281                            post_shift, NULL_RTX, 0);
4282                         t3 = expand_shift
4283                           (RSHIFT_EXPR, compute_mode, op0,
4284                            size - 1, NULL_RTX, 0);
4285                         if (d < 0)
4286                           quotient
4287                             = force_operand (gen_rtx_MINUS (compute_mode,
4288                                                             t3, t2),
4289                                              tquotient);
4290                         else
4291                           quotient
4292                             = force_operand (gen_rtx_MINUS (compute_mode,
4293                                                             t2, t3),
4294                                              tquotient);
4295                       }
4296                     else
4297                       {
4298                         rtx t1, t2, t3, t4;
4299
4300                         if (post_shift >= BITS_PER_WORD
4301                             || size - 1 >= BITS_PER_WORD)
4302                           goto fail1;
4303
4304                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4305                         mlr = gen_int_mode (ml, compute_mode);
4306                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4307                                       + shift_cost (speed, compute_mode, size - 1)
4308                                       + 2 * add_cost (speed, compute_mode));
4309                         t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4310                                                    NULL_RTX, 0,
4311                                                    max_cost - extra_cost);
4312                         if (t1 == 0)
4313                           goto fail1;
4314                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4315                                                           t1, op0),
4316                                             NULL_RTX);
4317                         t3 = expand_shift
4318                           (RSHIFT_EXPR, compute_mode, t2,
4319                            post_shift, NULL_RTX, 0);
4320                         t4 = expand_shift
4321                           (RSHIFT_EXPR, compute_mode, op0,
4322                            size - 1, NULL_RTX, 0);
4323                         if (d < 0)
4324                           quotient
4325                             = force_operand (gen_rtx_MINUS (compute_mode,
4326                                                             t4, t3),
4327                                              tquotient);
4328                         else
4329                           quotient
4330                             = force_operand (gen_rtx_MINUS (compute_mode,
4331                                                             t3, t4),
4332                                              tquotient);
4333                       }
4334                   }
4335                 else            /* Too wide mode to use tricky code */
4336                   break;
4337
4338                 insn = get_last_insn ();
4339                 if (insn != last)
4340                   set_dst_reg_note (insn, REG_EQUAL,
4341                                     gen_rtx_DIV (compute_mode, op0, op1),
4342                                     quotient);
4343               }
4344             break;
4345           }
4346       fail1:
4347         delete_insns_since (last);
4348         break;
4349
4350       case FLOOR_DIV_EXPR:
4351       case FLOOR_MOD_EXPR:
4352       /* We will come here only for signed operations.  */
4353         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4354           {
4355             unsigned HOST_WIDE_INT mh, ml;
4356             int pre_shift, lgup, post_shift;
4357             HOST_WIDE_INT d = INTVAL (op1);
4358
4359             if (d > 0)
4360               {
4361                 /* We could just as easily deal with negative constants here,
4362                    but it does not seem worth the trouble for GCC 2.6.  */
4363                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4364                   {
4365                     pre_shift = floor_log2 (d);
4366                     if (rem_flag)
4367                       {
4368                         unsigned HOST_WIDE_INT mask
4369                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4370                         remainder = expand_binop
4371                           (compute_mode, and_optab, op0,
4372                            gen_int_mode (mask, compute_mode),
4373                            remainder, 0, OPTAB_LIB_WIDEN);
4374                         if (remainder)
4375                           return gen_lowpart (mode, remainder);
4376                       }
4377                     quotient = expand_shift
4378                       (RSHIFT_EXPR, compute_mode, op0,
4379                        pre_shift, tquotient, 0);
4380                   }
4381                 else
4382                   {
4383                     rtx t1, t2, t3, t4;
4384
4385                     mh = choose_multiplier (d, size, size - 1,
4386                                             &ml, &post_shift, &lgup);
4387                     gcc_assert (!mh);
4388
4389                     if (post_shift < BITS_PER_WORD
4390                         && size - 1 < BITS_PER_WORD)
4391                       {
4392                         t1 = expand_shift
4393                           (RSHIFT_EXPR, compute_mode, op0,
4394                            size - 1, NULL_RTX, 0);
4395                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4396                                            NULL_RTX, 0, OPTAB_WIDEN);
4397                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4398                                       + shift_cost (speed, compute_mode, size - 1)
4399                                       + 2 * add_cost (speed, compute_mode));
4400                         t3 = expmed_mult_highpart
4401                           (compute_mode, t2, gen_int_mode (ml, compute_mode),
4402                            NULL_RTX, 1, max_cost - extra_cost);
4403                         if (t3 != 0)
4404                           {
4405                             t4 = expand_shift
4406                               (RSHIFT_EXPR, compute_mode, t3,
4407                                post_shift, NULL_RTX, 1);
4408                             quotient = expand_binop (compute_mode, xor_optab,
4409                                                      t4, t1, tquotient, 0,
4410                                                      OPTAB_WIDEN);
4411                           }
4412                       }
4413                   }
4414               }
4415             else
4416               {
4417                 rtx nsign, t1, t2, t3, t4;
4418                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4419                                                   op0, constm1_rtx), NULL_RTX);
4420                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4421                                    0, OPTAB_WIDEN);
4422                 nsign = expand_shift
4423                   (RSHIFT_EXPR, compute_mode, t2,
4424                    size - 1, NULL_RTX, 0);
4425                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4426                                     NULL_RTX);
4427                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4428                                     NULL_RTX, 0);
4429                 if (t4)
4430                   {
4431                     rtx t5;
4432                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4433                                       NULL_RTX, 0);
4434                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4435                                                             t4, t5),
4436                                               tquotient);
4437                   }
4438               }
4439           }
4440
4441         if (quotient != 0)
4442           break;
4443         delete_insns_since (last);
4444
4445         /* Try using an instruction that produces both the quotient and
4446            remainder, using truncation.  We can easily compensate the quotient
4447            or remainder to get floor rounding, once we have the remainder.
4448            Notice that we compute also the final remainder value here,
4449            and return the result right away.  */
4450         if (target == 0 || GET_MODE (target) != compute_mode)
4451           target = gen_reg_rtx (compute_mode);
4452
4453         if (rem_flag)
4454           {
4455             remainder
4456               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4457             quotient = gen_reg_rtx (compute_mode);
4458           }
4459         else
4460           {
4461             quotient
4462               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4463             remainder = gen_reg_rtx (compute_mode);
4464           }
4465
4466         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4467                                  quotient, remainder, 0))
4468           {
4469             /* This could be computed with a branch-less sequence.
4470                Save that for later.  */
4471             rtx tem;
4472             rtx_code_label *label = gen_label_rtx ();
4473             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4474             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4475                                 NULL_RTX, 0, OPTAB_WIDEN);
4476             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4477             expand_dec (quotient, const1_rtx);
4478             expand_inc (remainder, op1);
4479             emit_label (label);
4480             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4481           }
4482
4483         /* No luck with division elimination or divmod.  Have to do it
4484            by conditionally adjusting op0 *and* the result.  */
4485         {
4486           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4487           rtx adjusted_op0;
4488           rtx tem;
4489
4490           quotient = gen_reg_rtx (compute_mode);
4491           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4492           label1 = gen_label_rtx ();
4493           label2 = gen_label_rtx ();
4494           label3 = gen_label_rtx ();
4495           label4 = gen_label_rtx ();
4496           label5 = gen_label_rtx ();
4497           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4498           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4499           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4500                               quotient, 0, OPTAB_LIB_WIDEN);
4501           if (tem != quotient)
4502             emit_move_insn (quotient, tem);
4503           emit_jump_insn (gen_jump (label5));
4504           emit_barrier ();
4505           emit_label (label1);
4506           expand_inc (adjusted_op0, const1_rtx);
4507           emit_jump_insn (gen_jump (label4));
4508           emit_barrier ();
4509           emit_label (label2);
4510           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4511           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4512                               quotient, 0, OPTAB_LIB_WIDEN);
4513           if (tem != quotient)
4514             emit_move_insn (quotient, tem);
4515           emit_jump_insn (gen_jump (label5));
4516           emit_barrier ();
4517           emit_label (label3);
4518           expand_dec (adjusted_op0, const1_rtx);
4519           emit_label (label4);
4520           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4521                               quotient, 0, OPTAB_LIB_WIDEN);
4522           if (tem != quotient)
4523             emit_move_insn (quotient, tem);
4524           expand_dec (quotient, const1_rtx);
4525           emit_label (label5);
4526         }
4527         break;
4528
4529       case CEIL_DIV_EXPR:
4530       case CEIL_MOD_EXPR:
4531         if (unsignedp)
4532           {
4533             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4534               {
4535                 rtx t1, t2, t3;
4536                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4537                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4538                                    floor_log2 (d), tquotient, 1);
4539                 t2 = expand_binop (compute_mode, and_optab, op0,
4540                                    gen_int_mode (d - 1, compute_mode),
4541                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4542                 t3 = gen_reg_rtx (compute_mode);
4543                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4544                                       compute_mode, 1, 1);
4545                 if (t3 == 0)
4546                   {
4547                     rtx_code_label *lab;
4548                     lab = gen_label_rtx ();
4549                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4550                     expand_inc (t1, const1_rtx);
4551                     emit_label (lab);
4552                     quotient = t1;
4553                   }
4554                 else
4555                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4556                                                           t1, t3),
4557                                             tquotient);
4558                 break;
4559               }
4560
4561             /* Try using an instruction that produces both the quotient and
4562                remainder, using truncation.  We can easily compensate the
4563                quotient or remainder to get ceiling rounding, once we have the
4564                remainder.  Notice that we compute also the final remainder
4565                value here, and return the result right away.  */
4566             if (target == 0 || GET_MODE (target) != compute_mode)
4567               target = gen_reg_rtx (compute_mode);
4568
4569             if (rem_flag)
4570               {
4571                 remainder = (REG_P (target)
4572                              ? target : gen_reg_rtx (compute_mode));
4573                 quotient = gen_reg_rtx (compute_mode);
4574               }
4575             else
4576               {
4577                 quotient = (REG_P (target)
4578                             ? target : gen_reg_rtx (compute_mode));
4579                 remainder = gen_reg_rtx (compute_mode);
4580               }
4581
4582             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4583                                      remainder, 1))
4584               {
4585                 /* This could be computed with a branch-less sequence.
4586                    Save that for later.  */
4587                 rtx_code_label *label = gen_label_rtx ();
4588                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4589                                  compute_mode, label);
4590                 expand_inc (quotient, const1_rtx);
4591                 expand_dec (remainder, op1);
4592                 emit_label (label);
4593                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4594               }
4595
4596             /* No luck with division elimination or divmod.  Have to do it
4597                by conditionally adjusting op0 *and* the result.  */
4598             {
4599               rtx_code_label *label1, *label2;
4600               rtx adjusted_op0, tem;
4601
4602               quotient = gen_reg_rtx (compute_mode);
4603               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4604               label1 = gen_label_rtx ();
4605               label2 = gen_label_rtx ();
4606               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4607                                compute_mode, label1);
4608               emit_move_insn  (quotient, const0_rtx);
4609               emit_jump_insn (gen_jump (label2));
4610               emit_barrier ();
4611               emit_label (label1);
4612               expand_dec (adjusted_op0, const1_rtx);
4613               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4614                                   quotient, 1, OPTAB_LIB_WIDEN);
4615               if (tem != quotient)
4616                 emit_move_insn (quotient, tem);
4617               expand_inc (quotient, const1_rtx);
4618               emit_label (label2);
4619             }
4620           }
4621         else /* signed */
4622           {
4623             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4624                 && INTVAL (op1) >= 0)
4625               {
4626                 /* This is extremely similar to the code for the unsigned case
4627                    above.  For 2.7 we should merge these variants, but for
4628                    2.6.1 I don't want to touch the code for unsigned since that
4629                    get used in C.  The signed case will only be used by other
4630                    languages (Ada).  */
4631
4632                 rtx t1, t2, t3;
4633                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4634                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4635                                    floor_log2 (d), tquotient, 0);
4636                 t2 = expand_binop (compute_mode, and_optab, op0,
4637                                    gen_int_mode (d - 1, compute_mode),
4638                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4639                 t3 = gen_reg_rtx (compute_mode);
4640                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4641                                       compute_mode, 1, 1);
4642                 if (t3 == 0)
4643                   {
4644                     rtx_code_label *lab;
4645                     lab = gen_label_rtx ();
4646                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4647                     expand_inc (t1, const1_rtx);
4648                     emit_label (lab);
4649                     quotient = t1;
4650                   }
4651                 else
4652                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4653                                                           t1, t3),
4654                                             tquotient);
4655                 break;
4656               }
4657
4658             /* Try using an instruction that produces both the quotient and
4659                remainder, using truncation.  We can easily compensate the
4660                quotient or remainder to get ceiling rounding, once we have the
4661                remainder.  Notice that we compute also the final remainder
4662                value here, and return the result right away.  */
4663             if (target == 0 || GET_MODE (target) != compute_mode)
4664               target = gen_reg_rtx (compute_mode);
4665             if (rem_flag)
4666               {
4667                 remainder= (REG_P (target)
4668                             ? target : gen_reg_rtx (compute_mode));
4669                 quotient = gen_reg_rtx (compute_mode);
4670               }
4671             else
4672               {
4673                 quotient = (REG_P (target)
4674                             ? target : gen_reg_rtx (compute_mode));
4675                 remainder = gen_reg_rtx (compute_mode);
4676               }
4677
4678             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4679                                      remainder, 0))
4680               {
4681                 /* This could be computed with a branch-less sequence.
4682                    Save that for later.  */
4683                 rtx tem;
4684                 rtx_code_label *label = gen_label_rtx ();
4685                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4686                                  compute_mode, label);
4687                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4688                                     NULL_RTX, 0, OPTAB_WIDEN);
4689                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4690                 expand_inc (quotient, const1_rtx);
4691                 expand_dec (remainder, op1);
4692                 emit_label (label);
4693                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4694               }
4695
4696             /* No luck with division elimination or divmod.  Have to do it
4697                by conditionally adjusting op0 *and* the result.  */
4698             {
4699               rtx_code_label *label1, *label2, *label3, *label4, *label5;
4700               rtx adjusted_op0;
4701               rtx tem;
4702
4703               quotient = gen_reg_rtx (compute_mode);
4704               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4705               label1 = gen_label_rtx ();
4706               label2 = gen_label_rtx ();
4707               label3 = gen_label_rtx ();
4708               label4 = gen_label_rtx ();
4709               label5 = gen_label_rtx ();
4710               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4711               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4712                                compute_mode, label1);
4713               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4714                                   quotient, 0, OPTAB_LIB_WIDEN);
4715               if (tem != quotient)
4716                 emit_move_insn (quotient, tem);
4717               emit_jump_insn (gen_jump (label5));
4718               emit_barrier ();
4719               emit_label (label1);
4720               expand_dec (adjusted_op0, const1_rtx);
4721               emit_jump_insn (gen_jump (label4));
4722               emit_barrier ();
4723               emit_label (label2);
4724               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4725                                compute_mode, label3);
4726               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4727                                   quotient, 0, OPTAB_LIB_WIDEN);
4728               if (tem != quotient)
4729                 emit_move_insn (quotient, tem);
4730               emit_jump_insn (gen_jump (label5));
4731               emit_barrier ();
4732               emit_label (label3);
4733               expand_inc (adjusted_op0, const1_rtx);
4734               emit_label (label4);
4735               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4736                                   quotient, 0, OPTAB_LIB_WIDEN);
4737               if (tem != quotient)
4738                 emit_move_insn (quotient, tem);
4739               expand_inc (quotient, const1_rtx);
4740               emit_label (label5);
4741             }
4742           }
4743         break;
4744
4745       case EXACT_DIV_EXPR:
4746         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4747           {
4748             HOST_WIDE_INT d = INTVAL (op1);
4749             unsigned HOST_WIDE_INT ml;
4750             int pre_shift;
4751             rtx t1;
4752
4753             pre_shift = floor_log2 (d & -d);
4754             ml = invert_mod2n (d >> pre_shift, size);
4755             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4756                                pre_shift, NULL_RTX, unsignedp);
4757             quotient = expand_mult (compute_mode, t1,
4758                                     gen_int_mode (ml, compute_mode),
4759                                     NULL_RTX, 1);
4760
4761             insn = get_last_insn ();
4762             set_dst_reg_note (insn, REG_EQUAL,
4763                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4764                                               compute_mode, op0, op1),
4765                               quotient);
4766           }
4767         break;
4768
4769       case ROUND_DIV_EXPR:
4770       case ROUND_MOD_EXPR:
4771         if (unsignedp)
4772           {
4773             rtx tem;
4774             rtx_code_label *label;
4775             label = gen_label_rtx ();
4776             quotient = gen_reg_rtx (compute_mode);
4777             remainder = gen_reg_rtx (compute_mode);
4778             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4779               {
4780                 rtx tem;
4781                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4782                                          quotient, 1, OPTAB_LIB_WIDEN);
4783                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4784                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4785                                           remainder, 1, OPTAB_LIB_WIDEN);
4786               }
4787             tem = plus_constant (compute_mode, op1, -1);
4788             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4789             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4790             expand_inc (quotient, const1_rtx);
4791             expand_dec (remainder, op1);
4792             emit_label (label);
4793           }
4794         else
4795           {
4796             rtx abs_rem, abs_op1, tem, mask;
4797             rtx_code_label *label;
4798             label = gen_label_rtx ();
4799             quotient = gen_reg_rtx (compute_mode);
4800             remainder = gen_reg_rtx (compute_mode);
4801             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4802               {
4803                 rtx tem;
4804                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4805                                          quotient, 0, OPTAB_LIB_WIDEN);
4806                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4807                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4808                                           remainder, 0, OPTAB_LIB_WIDEN);
4809               }
4810             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4811             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4812             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4813                                 1, NULL_RTX, 1);
4814             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4815             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4816                                 NULL_RTX, 0, OPTAB_WIDEN);
4817             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4818                                  size - 1, NULL_RTX, 0);
4819             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4820                                 NULL_RTX, 0, OPTAB_WIDEN);
4821             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4822                                 NULL_RTX, 0, OPTAB_WIDEN);
4823             expand_inc (quotient, tem);
4824             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4825                                 NULL_RTX, 0, OPTAB_WIDEN);
4826             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4827                                 NULL_RTX, 0, OPTAB_WIDEN);
4828             expand_dec (remainder, tem);
4829             emit_label (label);
4830           }
4831         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4832
4833       default:
4834         gcc_unreachable ();
4835       }
4836
4837   if (quotient == 0)
4838     {
4839       if (target && GET_MODE (target) != compute_mode)
4840         target = 0;
4841
4842       if (rem_flag)
4843         {
4844           /* Try to produce the remainder without producing the quotient.
4845              If we seem to have a divmod pattern that does not require widening,
4846              don't try widening here.  We should really have a WIDEN argument
4847              to expand_twoval_binop, since what we'd really like to do here is
4848              1) try a mod insn in compute_mode
4849              2) try a divmod insn in compute_mode
4850              3) try a div insn in compute_mode and multiply-subtract to get
4851                 remainder
4852              4) try the same things with widening allowed.  */
4853           remainder
4854             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4855                                  op0, op1, target,
4856                                  unsignedp,
4857                                  ((optab_handler (optab2, compute_mode)
4858                                    != CODE_FOR_nothing)
4859                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4860           if (remainder == 0)
4861             {
4862               /* No luck there.  Can we do remainder and divide at once
4863                  without a library call?  */
4864               remainder = gen_reg_rtx (compute_mode);
4865               if (! expand_twoval_binop ((unsignedp
4866                                           ? udivmod_optab
4867                                           : sdivmod_optab),
4868                                          op0, op1,
4869                                          NULL_RTX, remainder, unsignedp))
4870                 remainder = 0;
4871             }
4872
4873           if (remainder)
4874             return gen_lowpart (mode, remainder);
4875         }
4876
4877       /* Produce the quotient.  Try a quotient insn, but not a library call.
4878          If we have a divmod in this mode, use it in preference to widening
4879          the div (for this test we assume it will not fail). Note that optab2
4880          is set to the one of the two optabs that the call below will use.  */
4881       quotient
4882         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4883                              op0, op1, rem_flag ? NULL_RTX : target,
4884                              unsignedp,
4885                              ((optab_handler (optab2, compute_mode)
4886                                != CODE_FOR_nothing)
4887                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4888
4889       if (quotient == 0)
4890         {
4891           /* No luck there.  Try a quotient-and-remainder insn,
4892              keeping the quotient alone.  */
4893           quotient = gen_reg_rtx (compute_mode);
4894           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4895                                      op0, op1,
4896                                      quotient, NULL_RTX, unsignedp))
4897             {
4898               quotient = 0;
4899               if (! rem_flag)
4900                 /* Still no luck.  If we are not computing the remainder,
4901                    use a library call for the quotient.  */
4902                 quotient = sign_expand_binop (compute_mode,
4903                                               udiv_optab, sdiv_optab,
4904                                               op0, op1, target,
4905                                               unsignedp, OPTAB_LIB_WIDEN);
4906             }
4907         }
4908     }
4909
4910   if (rem_flag)
4911     {
4912       if (target && GET_MODE (target) != compute_mode)
4913         target = 0;
4914
4915       if (quotient == 0)
4916         {
4917           /* No divide instruction either.  Use library for remainder.  */
4918           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4919                                          op0, op1, target,
4920                                          unsignedp, OPTAB_LIB_WIDEN);
4921           /* No remainder function.  Try a quotient-and-remainder
4922              function, keeping the remainder.  */
4923           if (!remainder)
4924             {
4925               remainder = gen_reg_rtx (compute_mode);
4926               if (!expand_twoval_binop_libfunc
4927                   (unsignedp ? udivmod_optab : sdivmod_optab,
4928                    op0, op1,
4929                    NULL_RTX, remainder,
4930                    unsignedp ? UMOD : MOD))
4931                 remainder = NULL_RTX;
4932             }
4933         }
4934       else
4935         {
4936           /* We divided.  Now finish doing X - Y * (X / Y).  */
4937           remainder = expand_mult (compute_mode, quotient, op1,
4938                                    NULL_RTX, unsignedp);
4939           remainder = expand_binop (compute_mode, sub_optab, op0,
4940                                     remainder, target, unsignedp,
4941                                     OPTAB_LIB_WIDEN);
4942         }
4943     }
4944
4945   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4946 }
4947 \f
4948 /* Return a tree node with data type TYPE, describing the value of X.
4949    Usually this is an VAR_DECL, if there is no obvious better choice.
4950    X may be an expression, however we only support those expressions
4951    generated by loop.c.  */
4952
4953 tree
4954 make_tree (tree type, rtx x)
4955 {
4956   tree t;
4957
4958   switch (GET_CODE (x))
4959     {
4960     case CONST_INT:
4961     case CONST_WIDE_INT:
4962       t = wide_int_to_tree (type, std::make_pair (x, TYPE_MODE (type)));
4963       return t;
4964
4965     case CONST_DOUBLE:
4966       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
4967       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
4968         t = wide_int_to_tree (type,
4969                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
4970                                                     HOST_BITS_PER_WIDE_INT * 2));
4971       else
4972         {
4973           REAL_VALUE_TYPE d;
4974
4975           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4976           t = build_real (type, d);
4977         }
4978
4979       return t;
4980
4981     case CONST_VECTOR:
4982       {
4983         int units = CONST_VECTOR_NUNITS (x);
4984         tree itype = TREE_TYPE (type);
4985         tree *elts;
4986         int i;
4987
4988         /* Build a tree with vector elements.  */
4989         elts = XALLOCAVEC (tree, units);
4990         for (i = units - 1; i >= 0; --i)
4991           {
4992             rtx elt = CONST_VECTOR_ELT (x, i);
4993             elts[i] = make_tree (itype, elt);
4994           }
4995
4996         return build_vector (type, elts);
4997       }
4998
4999     case PLUS:
5000       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5001                           make_tree (type, XEXP (x, 1)));
5002
5003     case MINUS:
5004       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5005                           make_tree (type, XEXP (x, 1)));
5006
5007     case NEG:
5008       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5009
5010     case MULT:
5011       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5012                           make_tree (type, XEXP (x, 1)));
5013
5014     case ASHIFT:
5015       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5016                           make_tree (type, XEXP (x, 1)));
5017
5018     case LSHIFTRT:
5019       t = unsigned_type_for (type);
5020       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5021                                          make_tree (t, XEXP (x, 0)),
5022                                          make_tree (type, XEXP (x, 1))));
5023
5024     case ASHIFTRT:
5025       t = signed_type_for (type);
5026       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5027                                          make_tree (t, XEXP (x, 0)),
5028                                          make_tree (type, XEXP (x, 1))));
5029
5030     case DIV:
5031       if (TREE_CODE (type) != REAL_TYPE)
5032         t = signed_type_for (type);
5033       else
5034         t = type;
5035
5036       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5037                                          make_tree (t, XEXP (x, 0)),
5038                                          make_tree (t, XEXP (x, 1))));
5039     case UDIV:
5040       t = unsigned_type_for (type);
5041       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5042                                          make_tree (t, XEXP (x, 0)),
5043                                          make_tree (t, XEXP (x, 1))));
5044
5045     case SIGN_EXTEND:
5046     case ZERO_EXTEND:
5047       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5048                                           GET_CODE (x) == ZERO_EXTEND);
5049       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5050
5051     case CONST:
5052       return make_tree (type, XEXP (x, 0));
5053
5054     case SYMBOL_REF:
5055       t = SYMBOL_REF_DECL (x);
5056       if (t)
5057         return fold_convert (type, build_fold_addr_expr (t));
5058       /* else fall through.  */
5059
5060     default:
5061       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5062
5063       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5064          address mode to pointer mode.  */
5065       if (POINTER_TYPE_P (type))
5066         x = convert_memory_address_addr_space
5067               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5068
5069       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5070          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5071       t->decl_with_rtl.rtl = x;
5072
5073       return t;
5074     }
5075 }
5076 \f
5077 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5078    and returning TARGET.
5079
5080    If TARGET is 0, a pseudo-register or constant is returned.  */
5081
5082 rtx
5083 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5084 {
5085   rtx tem = 0;
5086
5087   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5088     tem = simplify_binary_operation (AND, mode, op0, op1);
5089   if (tem == 0)
5090     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5091
5092   if (target == 0)
5093     target = tem;
5094   else if (tem != target)
5095     emit_move_insn (target, tem);
5096   return target;
5097 }
5098
5099 /* Helper function for emit_store_flag.  */
5100 static rtx
5101 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5102              machine_mode mode, machine_mode compare_mode,
5103              int unsignedp, rtx x, rtx y, int normalizep,
5104              machine_mode target_mode)
5105 {
5106   struct expand_operand ops[4];
5107   rtx op0, comparison, subtarget;
5108   rtx_insn *last;
5109   machine_mode result_mode = targetm.cstore_mode (icode);
5110
5111   last = get_last_insn ();
5112   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5113   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5114   if (!x || !y)
5115     {
5116       delete_insns_since (last);
5117       return NULL_RTX;
5118     }
5119
5120   if (target_mode == VOIDmode)
5121     target_mode = result_mode;
5122   if (!target)
5123     target = gen_reg_rtx (target_mode);
5124
5125   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5126
5127   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5128   create_fixed_operand (&ops[1], comparison);
5129   create_fixed_operand (&ops[2], x);
5130   create_fixed_operand (&ops[3], y);
5131   if (!maybe_expand_insn (icode, 4, ops))
5132     {
5133       delete_insns_since (last);
5134       return NULL_RTX;
5135     }
5136   subtarget = ops[0].value;
5137
5138   /* If we are converting to a wider mode, first convert to
5139      TARGET_MODE, then normalize.  This produces better combining
5140      opportunities on machines that have a SIGN_EXTRACT when we are
5141      testing a single bit.  This mostly benefits the 68k.
5142
5143      If STORE_FLAG_VALUE does not have the sign bit set when
5144      interpreted in MODE, we can do this conversion as unsigned, which
5145      is usually more efficient.  */
5146   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5147     {
5148       convert_move (target, subtarget,
5149                     val_signbit_known_clear_p (result_mode,
5150                                                STORE_FLAG_VALUE));
5151       op0 = target;
5152       result_mode = target_mode;
5153     }
5154   else
5155     op0 = subtarget;
5156
5157   /* If we want to keep subexpressions around, don't reuse our last
5158      target.  */
5159   if (optimize)
5160     subtarget = 0;
5161
5162   /* Now normalize to the proper value in MODE.  Sometimes we don't
5163      have to do anything.  */
5164   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5165     ;
5166   /* STORE_FLAG_VALUE might be the most negative number, so write
5167      the comparison this way to avoid a compiler-time warning.  */
5168   else if (- normalizep == STORE_FLAG_VALUE)
5169     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5170
5171   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5172      it hard to use a value of just the sign bit due to ANSI integer
5173      constant typing rules.  */
5174   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5175     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5176                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5177                         normalizep == 1);
5178   else
5179     {
5180       gcc_assert (STORE_FLAG_VALUE & 1);
5181
5182       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5183       if (normalizep == -1)
5184         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5185     }
5186
5187   /* If we were converting to a smaller mode, do the conversion now.  */
5188   if (target_mode != result_mode)
5189     {
5190       convert_move (target, op0, 0);
5191       return target;
5192     }
5193   else
5194     return op0;
5195 }
5196
5197
5198 /* A subroutine of emit_store_flag only including "tricks" that do not
5199    need a recursive call.  These are kept separate to avoid infinite
5200    loops.  */
5201
5202 static rtx
5203 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5204                    machine_mode mode, int unsignedp, int normalizep,
5205                    machine_mode target_mode)
5206 {
5207   rtx subtarget;
5208   enum insn_code icode;
5209   machine_mode compare_mode;
5210   enum mode_class mclass;
5211   enum rtx_code scode;
5212   rtx tem;
5213
5214   if (unsignedp)
5215     code = unsigned_condition (code);
5216   scode = swap_condition (code);
5217
5218   /* If one operand is constant, make it the second one.  Only do this
5219      if the other operand is not constant as well.  */
5220
5221   if (swap_commutative_operands_p (op0, op1))
5222     {
5223       tem = op0;
5224       op0 = op1;
5225       op1 = tem;
5226       code = swap_condition (code);
5227     }
5228
5229   if (mode == VOIDmode)
5230     mode = GET_MODE (op0);
5231
5232   /* For some comparisons with 1 and -1, we can convert this to
5233      comparisons with zero.  This will often produce more opportunities for
5234      store-flag insns.  */
5235
5236   switch (code)
5237     {
5238     case LT:
5239       if (op1 == const1_rtx)
5240         op1 = const0_rtx, code = LE;
5241       break;
5242     case LE:
5243       if (op1 == constm1_rtx)
5244         op1 = const0_rtx, code = LT;
5245       break;
5246     case GE:
5247       if (op1 == const1_rtx)
5248         op1 = const0_rtx, code = GT;
5249       break;
5250     case GT:
5251       if (op1 == constm1_rtx)
5252         op1 = const0_rtx, code = GE;
5253       break;
5254     case GEU:
5255       if (op1 == const1_rtx)
5256         op1 = const0_rtx, code = NE;
5257       break;
5258     case LTU:
5259       if (op1 == const1_rtx)
5260         op1 = const0_rtx, code = EQ;
5261       break;
5262     default:
5263       break;
5264     }
5265
5266   /* If we are comparing a double-word integer with zero or -1, we can
5267      convert the comparison into one involving a single word.  */
5268   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5269       && GET_MODE_CLASS (mode) == MODE_INT
5270       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5271     {
5272       if ((code == EQ || code == NE)
5273           && (op1 == const0_rtx || op1 == constm1_rtx))
5274         {
5275           rtx op00, op01;
5276
5277           /* Do a logical OR or AND of the two words and compare the
5278              result.  */
5279           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5280           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5281           tem = expand_binop (word_mode,
5282                               op1 == const0_rtx ? ior_optab : and_optab,
5283                               op00, op01, NULL_RTX, unsignedp,
5284                               OPTAB_DIRECT);
5285
5286           if (tem != 0)
5287             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5288                                    unsignedp, normalizep);
5289         }
5290       else if ((code == LT || code == GE) && op1 == const0_rtx)
5291         {
5292           rtx op0h;
5293
5294           /* If testing the sign bit, can just test on high word.  */
5295           op0h = simplify_gen_subreg (word_mode, op0, mode,
5296                                       subreg_highpart_offset (word_mode,
5297                                                               mode));
5298           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5299                                  unsignedp, normalizep);
5300         }
5301       else
5302         tem = NULL_RTX;
5303
5304       if (tem)
5305         {
5306           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5307             return tem;
5308           if (!target)
5309             target = gen_reg_rtx (target_mode);
5310
5311           convert_move (target, tem,
5312                         !val_signbit_known_set_p (word_mode,
5313                                                   (normalizep ? normalizep
5314                                                    : STORE_FLAG_VALUE)));
5315           return target;
5316         }
5317     }
5318
5319   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5320      complement of A (for GE) and shifting the sign bit to the low bit.  */
5321   if (op1 == const0_rtx && (code == LT || code == GE)
5322       && GET_MODE_CLASS (mode) == MODE_INT
5323       && (normalizep || STORE_FLAG_VALUE == 1
5324           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5325     {
5326       subtarget = target;
5327
5328       if (!target)
5329         target_mode = mode;
5330
5331       /* If the result is to be wider than OP0, it is best to convert it
5332          first.  If it is to be narrower, it is *incorrect* to convert it
5333          first.  */
5334       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5335         {
5336           op0 = convert_modes (target_mode, mode, op0, 0);
5337           mode = target_mode;
5338         }
5339
5340       if (target_mode != mode)
5341         subtarget = 0;
5342
5343       if (code == GE)
5344         op0 = expand_unop (mode, one_cmpl_optab, op0,
5345                            ((STORE_FLAG_VALUE == 1 || normalizep)
5346                             ? 0 : subtarget), 0);
5347
5348       if (STORE_FLAG_VALUE == 1 || normalizep)
5349         /* If we are supposed to produce a 0/1 value, we want to do
5350            a logical shift from the sign bit to the low-order bit; for
5351            a -1/0 value, we do an arithmetic shift.  */
5352         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5353                             GET_MODE_BITSIZE (mode) - 1,
5354                             subtarget, normalizep != -1);
5355
5356       if (mode != target_mode)
5357         op0 = convert_modes (target_mode, mode, op0, 0);
5358
5359       return op0;
5360     }
5361
5362   mclass = GET_MODE_CLASS (mode);
5363   for (compare_mode = mode; compare_mode != VOIDmode;
5364        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5365     {
5366      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5367      icode = optab_handler (cstore_optab, optab_mode);
5368      if (icode != CODE_FOR_nothing)
5369         {
5370           do_pending_stack_adjust ();
5371           tem = emit_cstore (target, icode, code, mode, compare_mode,
5372                              unsignedp, op0, op1, normalizep, target_mode);
5373           if (tem)
5374             return tem;
5375
5376           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5377             {
5378               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5379                                  unsignedp, op1, op0, normalizep, target_mode);
5380               if (tem)
5381                 return tem;
5382             }
5383           break;
5384         }
5385     }
5386
5387   return 0;
5388 }
5389
5390 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5391    and storing in TARGET.  Normally return TARGET.
5392    Return 0 if that cannot be done.
5393
5394    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5395    it is VOIDmode, they cannot both be CONST_INT.
5396
5397    UNSIGNEDP is for the case where we have to widen the operands
5398    to perform the operation.  It says to use zero-extension.
5399
5400    NORMALIZEP is 1 if we should convert the result to be either zero
5401    or one.  Normalize is -1 if we should convert the result to be
5402    either zero or -1.  If NORMALIZEP is zero, the result will be left
5403    "raw" out of the scc insn.  */
5404
5405 rtx
5406 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5407                  machine_mode mode, int unsignedp, int normalizep)
5408 {
5409   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5410   enum rtx_code rcode;
5411   rtx subtarget;
5412   rtx tem, trueval;
5413   rtx_insn *last;
5414
5415   /* If we compare constants, we shouldn't use a store-flag operation,
5416      but a constant load.  We can get there via the vanilla route that
5417      usually generates a compare-branch sequence, but will in this case
5418      fold the comparison to a constant, and thus elide the branch.  */
5419   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5420     return NULL_RTX;
5421
5422   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5423                            target_mode);
5424   if (tem)
5425     return tem;
5426
5427   /* If we reached here, we can't do this with a scc insn, however there
5428      are some comparisons that can be done in other ways.  Don't do any
5429      of these cases if branches are very cheap.  */
5430   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5431     return 0;
5432
5433   /* See what we need to return.  We can only return a 1, -1, or the
5434      sign bit.  */
5435
5436   if (normalizep == 0)
5437     {
5438       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5439         normalizep = STORE_FLAG_VALUE;
5440
5441       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5442         ;
5443       else
5444         return 0;
5445     }
5446
5447   last = get_last_insn ();
5448
5449   /* If optimizing, use different pseudo registers for each insn, instead
5450      of reusing the same pseudo.  This leads to better CSE, but slows
5451      down the compiler, since there are more pseudos */
5452   subtarget = (!optimize
5453                && (target_mode == mode)) ? target : NULL_RTX;
5454   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5455
5456   /* For floating-point comparisons, try the reverse comparison or try
5457      changing the "orderedness" of the comparison.  */
5458   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5459     {
5460       enum rtx_code first_code;
5461       bool and_them;
5462
5463       rcode = reverse_condition_maybe_unordered (code);
5464       if (can_compare_p (rcode, mode, ccp_store_flag)
5465           && (code == ORDERED || code == UNORDERED
5466               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5467               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5468         {
5469           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5470                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5471
5472           /* For the reverse comparison, use either an addition or a XOR.  */
5473           if (want_add
5474               && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5475                            optimize_insn_for_speed_p ()) == 0)
5476             {
5477               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5478                                        STORE_FLAG_VALUE, target_mode);
5479               if (tem)
5480                 return expand_binop (target_mode, add_optab, tem,
5481                                      gen_int_mode (normalizep, target_mode),
5482                                      target, 0, OPTAB_WIDEN);
5483             }
5484           else if (!want_add
5485                    && rtx_cost (trueval, XOR, 1,
5486                                 optimize_insn_for_speed_p ()) == 0)
5487             {
5488               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5489                                        normalizep, target_mode);
5490               if (tem)
5491                 return expand_binop (target_mode, xor_optab, tem, trueval,
5492                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5493             }
5494         }
5495
5496       delete_insns_since (last);
5497
5498       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5499       if (code == ORDERED || code == UNORDERED)
5500         return 0;
5501
5502       and_them = split_comparison (code, mode, &first_code, &code);
5503
5504       /* If there are no NaNs, the first comparison should always fall through.
5505          Effectively change the comparison to the other one.  */
5506       if (!HONOR_NANS (mode))
5507         {
5508           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5509           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5510                                     target_mode);
5511         }
5512
5513 #ifdef HAVE_conditional_move
5514       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5515          conditional move.  */
5516       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5517                                normalizep, target_mode);
5518       if (tem == 0)
5519         return 0;
5520
5521       if (and_them)
5522         tem = emit_conditional_move (target, code, op0, op1, mode,
5523                                      tem, const0_rtx, GET_MODE (tem), 0);
5524       else
5525         tem = emit_conditional_move (target, code, op0, op1, mode,
5526                                      trueval, tem, GET_MODE (tem), 0);
5527
5528       if (tem == 0)
5529         delete_insns_since (last);
5530       return tem;
5531 #else
5532       return 0;
5533 #endif
5534     }
5535
5536   /* The remaining tricks only apply to integer comparisons.  */
5537
5538   if (GET_MODE_CLASS (mode) != MODE_INT)
5539     return 0;
5540
5541   /* If this is an equality comparison of integers, we can try to exclusive-or
5542      (or subtract) the two operands and use a recursive call to try the
5543      comparison with zero.  Don't do any of these cases if branches are
5544      very cheap.  */
5545
5546   if ((code == EQ || code == NE) && op1 != const0_rtx)
5547     {
5548       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5549                           OPTAB_WIDEN);
5550
5551       if (tem == 0)
5552         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5553                             OPTAB_WIDEN);
5554       if (tem != 0)
5555         tem = emit_store_flag (target, code, tem, const0_rtx,
5556                                mode, unsignedp, normalizep);
5557       if (tem != 0)
5558         return tem;
5559
5560       delete_insns_since (last);
5561     }
5562
5563   /* For integer comparisons, try the reverse comparison.  However, for
5564      small X and if we'd have anyway to extend, implementing "X != 0"
5565      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5566   rcode = reverse_condition (code);
5567   if (can_compare_p (rcode, mode, ccp_store_flag)
5568       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5569             && code == NE
5570             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5571             && op1 == const0_rtx))
5572     {
5573       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5574                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5575
5576       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5577       if (want_add
5578           && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5579                        optimize_insn_for_speed_p ()) == 0)
5580         {
5581           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5582                                    STORE_FLAG_VALUE, target_mode);
5583           if (tem != 0)
5584             tem = expand_binop (target_mode, add_optab, tem,
5585                                 gen_int_mode (normalizep, target_mode),
5586                                 target, 0, OPTAB_WIDEN);
5587         }
5588       else if (!want_add
5589                && rtx_cost (trueval, XOR, 1,
5590                             optimize_insn_for_speed_p ()) == 0)
5591         {
5592           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5593                                    normalizep, target_mode);
5594           if (tem != 0)
5595             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5596                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5597         }
5598
5599       if (tem != 0)
5600         return tem;
5601       delete_insns_since (last);
5602     }
5603
5604   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5605      the constant zero.  Reject all other comparisons at this point.  Only
5606      do LE and GT if branches are expensive since they are expensive on
5607      2-operand machines.  */
5608
5609   if (op1 != const0_rtx
5610       || (code != EQ && code != NE
5611           && (BRANCH_COST (optimize_insn_for_speed_p (),
5612                            false) <= 1 || (code != LE && code != GT))))
5613     return 0;
5614
5615   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5616      do the necessary operation below.  */
5617
5618   tem = 0;
5619
5620   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5621      the sign bit set.  */
5622
5623   if (code == LE)
5624     {
5625       /* This is destructive, so SUBTARGET can't be OP0.  */
5626       if (rtx_equal_p (subtarget, op0))
5627         subtarget = 0;
5628
5629       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5630                           OPTAB_WIDEN);
5631       if (tem)
5632         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5633                             OPTAB_WIDEN);
5634     }
5635
5636   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5637      number of bits in the mode of OP0, minus one.  */
5638
5639   if (code == GT)
5640     {
5641       if (rtx_equal_p (subtarget, op0))
5642         subtarget = 0;
5643
5644       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5645                           GET_MODE_BITSIZE (mode) - 1,
5646                           subtarget, 0);
5647       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5648                           OPTAB_WIDEN);
5649     }
5650
5651   if (code == EQ || code == NE)
5652     {
5653       /* For EQ or NE, one way to do the comparison is to apply an operation
5654          that converts the operand into a positive number if it is nonzero
5655          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5656          for NE we negate.  This puts the result in the sign bit.  Then we
5657          normalize with a shift, if needed.
5658
5659          Two operations that can do the above actions are ABS and FFS, so try
5660          them.  If that doesn't work, and MODE is smaller than a full word,
5661          we can use zero-extension to the wider mode (an unsigned conversion)
5662          as the operation.  */
5663
5664       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5665          that is compensated by the subsequent overflow when subtracting
5666          one / negating.  */
5667
5668       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5669         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5670       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5671         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5672       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5673         {
5674           tem = convert_modes (word_mode, mode, op0, 1);
5675           mode = word_mode;
5676         }
5677
5678       if (tem != 0)
5679         {
5680           if (code == EQ)
5681             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5682                                 0, OPTAB_WIDEN);
5683           else
5684             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5685         }
5686
5687       /* If we couldn't do it that way, for NE we can "or" the two's complement
5688          of the value with itself.  For EQ, we take the one's complement of
5689          that "or", which is an extra insn, so we only handle EQ if branches
5690          are expensive.  */
5691
5692       if (tem == 0
5693           && (code == NE
5694               || BRANCH_COST (optimize_insn_for_speed_p (),
5695                               false) > 1))
5696         {
5697           if (rtx_equal_p (subtarget, op0))
5698             subtarget = 0;
5699
5700           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5701           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5702                               OPTAB_WIDEN);
5703
5704           if (tem && code == EQ)
5705             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5706         }
5707     }
5708
5709   if (tem && normalizep)
5710     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5711                         GET_MODE_BITSIZE (mode) - 1,
5712                         subtarget, normalizep == 1);
5713
5714   if (tem)
5715     {
5716       if (!target)
5717         ;
5718       else if (GET_MODE (tem) != target_mode)
5719         {
5720           convert_move (target, tem, 0);
5721           tem = target;
5722         }
5723       else if (!subtarget)
5724         {
5725           emit_move_insn (target, tem);
5726           tem = target;
5727         }
5728     }
5729   else
5730     delete_insns_since (last);
5731
5732   return tem;
5733 }
5734
5735 /* Like emit_store_flag, but always succeeds.  */
5736
5737 rtx
5738 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5739                        machine_mode mode, int unsignedp, int normalizep)
5740 {
5741   rtx tem;
5742   rtx_code_label *label;
5743   rtx trueval, falseval;
5744
5745   /* First see if emit_store_flag can do the job.  */
5746   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5747   if (tem != 0)
5748     return tem;
5749
5750   if (!target)
5751     target = gen_reg_rtx (word_mode);
5752
5753   /* If this failed, we have to do this with set/compare/jump/set code.
5754      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5755   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5756   if (code == NE
5757       && GET_MODE_CLASS (mode) == MODE_INT
5758       && REG_P (target)
5759       && op0 == target
5760       && op1 == const0_rtx)
5761     {
5762       label = gen_label_rtx ();
5763       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5764                                mode, NULL_RTX, NULL_RTX, label, -1);
5765       emit_move_insn (target, trueval);
5766       emit_label (label);
5767       return target;
5768     }
5769
5770   if (!REG_P (target)
5771       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5772     target = gen_reg_rtx (GET_MODE (target));
5773
5774   /* Jump in the right direction if the target cannot implement CODE
5775      but can jump on its reverse condition.  */
5776   falseval = const0_rtx;
5777   if (! can_compare_p (code, mode, ccp_jump)
5778       && (! FLOAT_MODE_P (mode)
5779           || code == ORDERED || code == UNORDERED
5780           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5781           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5782     {
5783       enum rtx_code rcode;
5784       if (FLOAT_MODE_P (mode))
5785         rcode = reverse_condition_maybe_unordered (code);
5786       else
5787         rcode = reverse_condition (code);
5788
5789       /* Canonicalize to UNORDERED for the libcall.  */
5790       if (can_compare_p (rcode, mode, ccp_jump)
5791           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5792         {
5793           falseval = trueval;
5794           trueval = const0_rtx;
5795           code = rcode;
5796         }
5797     }
5798
5799   emit_move_insn (target, trueval);
5800   label = gen_label_rtx ();
5801   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5802                            NULL_RTX, label, -1);
5803
5804   emit_move_insn (target, falseval);
5805   emit_label (label);
5806
5807   return target;
5808 }
5809 \f
5810 /* Perform possibly multi-word comparison and conditional jump to LABEL
5811    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5812    now a thin wrapper around do_compare_rtx_and_jump.  */
5813
5814 static void
5815 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
5816                  rtx_code_label *label)
5817 {
5818   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5819   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5820                            NULL_RTX, NULL_RTX, label, -1);
5821 }