gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2014 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "diagnostic-core.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "stor-layout.h"
  30 #include "tm_p.h"
  31 #include "flags.h"
  32 #include "insn-config.h"
  33 #include "expr.h"
  34 #include "insn-codes.h"
  35 #include "optabs.h"
  36 #include "recog.h"
  37 #include "langhooks.h"
  38 #include "predict.h"
  39 #include "basic-block.h"
  40 #include "df.h"
  41 #include "target.h"
  42 #include "expmed.h"
  43
  44 struct target_expmed default_target_expmed;
  45 #if SWITCHABLE_TARGET
  46 struct target_expmed *this_target_expmed = &default_target_expmed;
  47 #endif
  48
  49 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  50                                    unsigned HOST_WIDE_INT,
  51                                    unsigned HOST_WIDE_INT,
  52                                    unsigned HOST_WIDE_INT,
  53                                    rtx);
  54 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
  55                                      unsigned HOST_WIDE_INT,
  56                                      rtx);
  57 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  58                                    unsigned HOST_WIDE_INT,
  59                                    unsigned HOST_WIDE_INT,
  60                                    unsigned HOST_WIDE_INT,
  61                                    rtx);
  62 static rtx extract_fixed_bit_field (machine_mode, rtx,
  63                                     unsigned HOST_WIDE_INT,
  64                                     unsigned HOST_WIDE_INT, rtx, int);
  65 static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
  66                                       unsigned HOST_WIDE_INT,
  67                                       unsigned HOST_WIDE_INT, rtx, int);
  68 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  69 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  70                                     unsigned HOST_WIDE_INT, int);
  71 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  72 static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  73 static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  74
  75 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  76    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  77    The mask is truncated if necessary to the width of mode MODE.  The
  78    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
  79
  80 static inline rtx
  81 mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
  82 {
  83   return immed_wide_int_const
  84     (wi::shifted_mask (bitpos, bitsize, complement,
  85                        GET_MODE_PRECISION (mode)), mode);
  86 }
  87
  88 /* Test whether a value is zero of a power of two.  */
  89 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
  90   (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
  91
  92 struct init_expmed_rtl
  93 {
  94   rtx reg;
  95   rtx plus;
  96   rtx neg;
  97   rtx mult;
  98   rtx sdiv;
  99   rtx udiv;
 100   rtx sdiv_32;
 101   rtx smod_32;
 102   rtx wide_mult;
 103   rtx wide_lshr;
 104   rtx wide_trunc;
 105   rtx shift;
 106   rtx shift_mult;
 107   rtx shift_add;
 108   rtx shift_sub0;
 109   rtx shift_sub1;
 110   rtx zext;
 111   rtx trunc;
 112
 113   rtx pow2[MAX_BITS_PER_WORD];
 114   rtx cint[MAX_BITS_PER_WORD];
 115 };
 116
 117 static void
 118 init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
 119                       machine_mode from_mode, bool speed)
 120 {
 121   int to_size, from_size;
 122   rtx which;
 123
 124   to_size = GET_MODE_PRECISION (to_mode);
 125   from_size = GET_MODE_PRECISION (from_mode);
 126
 127   /* Most partial integers have a precision less than the "full"
 128      integer it requires for storage.  In case one doesn't, for
 129      comparison purposes here, reduce the bit size by one in that
 130      case.  */
 131   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 132       && exact_log2 (to_size) != -1)
 133     to_size --;
 134   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 135       && exact_log2 (from_size) != -1)
 136     from_size --;
 137
 138   /* Assume cost of zero-extend and sign-extend is the same.  */
 139   which = (to_size < from_size ? all->trunc : all->zext);
 140
 141   PUT_MODE (all->reg, from_mode);
 142   set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
 143 }
 144
 145 static void
 146 init_expmed_one_mode (struct init_expmed_rtl *all,
 147                       machine_mode mode, int speed)
 148 {
 149   int m, n, mode_bitsize;
 150   machine_mode mode_from;
 151
 152   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 153
 154   PUT_MODE (all->reg, mode);
 155   PUT_MODE (all->plus, mode);
 156   PUT_MODE (all->neg, mode);
 157   PUT_MODE (all->mult, mode);
 158   PUT_MODE (all->sdiv, mode);
 159   PUT_MODE (all->udiv, mode);
 160   PUT_MODE (all->sdiv_32, mode);
 161   PUT_MODE (all->smod_32, mode);
 162   PUT_MODE (all->wide_trunc, mode);
 163   PUT_MODE (all->shift, mode);
 164   PUT_MODE (all->shift_mult, mode);
 165   PUT_MODE (all->shift_add, mode);
 166   PUT_MODE (all->shift_sub0, mode);
 167   PUT_MODE (all->shift_sub1, mode);
 168   PUT_MODE (all->zext, mode);
 169   PUT_MODE (all->trunc, mode);
 170
 171   set_add_cost (speed, mode, set_src_cost (all->plus, speed));
 172   set_neg_cost (speed, mode, set_src_cost (all->neg, speed));
 173   set_mul_cost (speed, mode, set_src_cost (all->mult, speed));
 174   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, speed));
 175   set_udiv_cost (speed, mode, set_src_cost (all->udiv, speed));
 176
 177   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, speed)
 178                                      <= 2 * add_cost (speed, mode)));
 179   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, speed)
 180                                      <= 4 * add_cost (speed, mode)));
 181
 182   set_shift_cost (speed, mode, 0, 0);
 183   {
 184     int cost = add_cost (speed, mode);
 185     set_shiftadd_cost (speed, mode, 0, cost);
 186     set_shiftsub0_cost (speed, mode, 0, cost);
 187     set_shiftsub1_cost (speed, mode, 0, cost);
 188   }
 189
 190   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 191   for (m = 1; m < n; m++)
 192     {
 193       XEXP (all->shift, 1) = all->cint[m];
 194       XEXP (all->shift_mult, 1) = all->pow2[m];
 195
 196       set_shift_cost (speed, mode, m, set_src_cost (all->shift, speed));
 197       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, speed));
 198       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, speed));
 199       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, speed));
 200     }
 201
 202   if (SCALAR_INT_MODE_P (mode))
 203     {
 204       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 205            mode_from = (machine_mode)(mode_from + 1))
 206         init_expmed_one_conv (all, mode, mode_from, speed);
 207     }
 208   if (GET_MODE_CLASS (mode) == MODE_INT)
 209     {
 210       machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
 211       if (wider_mode != VOIDmode)
 212         {
 213           PUT_MODE (all->zext, wider_mode);
 214           PUT_MODE (all->wide_mult, wider_mode);
 215           PUT_MODE (all->wide_lshr, wider_mode);
 216           XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 217
 218           set_mul_widen_cost (speed, wider_mode,
 219                               set_src_cost (all->wide_mult, speed));
 220           set_mul_highpart_cost (speed, mode,
 221                                  set_src_cost (all->wide_trunc, speed));
 222         }
 223     }
 224 }
 225
 226 void
 227 init_expmed (void)
 228 {
 229   struct init_expmed_rtl all;
 230   machine_mode mode = QImode;
 231   int m, speed;
 232
 233   memset (&all, 0, sizeof all);
 234   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 235     {
 236       all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 237       all.cint[m] = GEN_INT (m);
 238     }
 239
 240   /* Avoid using hard regs in ways which may be unsupported.  */
 241   all.reg = gen_rtx_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 242   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 243   all.neg = gen_rtx_NEG (mode, all.reg);
 244   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 245   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 246   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 247   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 248   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 249   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 250   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 251   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 252   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 253   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 254   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 255   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 256   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 257   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 258   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 259
 260   for (speed = 0; speed < 2; speed++)
 261     {
 262       crtl->maybe_hot_insn_p = speed;
 263       set_zero_cost (speed, set_src_cost (const0_rtx, speed));
 264
 265       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 266            mode = (machine_mode)(mode + 1))
 267         init_expmed_one_mode (&all, mode, speed);
 268
 269       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 270         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 271              mode = (machine_mode)(mode + 1))
 272           init_expmed_one_mode (&all, mode, speed);
 273
 274       if (MIN_MODE_VECTOR_INT != VOIDmode)
 275         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 276              mode = (machine_mode)(mode + 1))
 277           init_expmed_one_mode (&all, mode, speed);
 278     }
 279
 280   if (alg_hash_used_p ())
 281     {
 282       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 283       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 284     }
 285   else
 286     set_alg_hash_used_p (true);
 287   default_rtl_profile ();
 288
 289   ggc_free (all.trunc);
 290   ggc_free (all.shift_sub1);
 291   ggc_free (all.shift_sub0);
 292   ggc_free (all.shift_add);
 293   ggc_free (all.shift_mult);
 294   ggc_free (all.shift);
 295   ggc_free (all.wide_trunc);
 296   ggc_free (all.wide_lshr);
 297   ggc_free (all.wide_mult);
 298   ggc_free (all.zext);
 299   ggc_free (all.smod_32);
 300   ggc_free (all.sdiv_32);
 301   ggc_free (all.udiv);
 302   ggc_free (all.sdiv);
 303   ggc_free (all.mult);
 304   ggc_free (all.neg);
 305   ggc_free (all.plus);
 306   ggc_free (all.reg);
 307 }
 308
 309 /* Return an rtx representing minus the value of X.
 310    MODE is the intended mode of the result,
 311    useful if X is a CONST_INT.  */
 312
 313 rtx
 314 negate_rtx (machine_mode mode, rtx x)
 315 {
 316   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 317
 318   if (result == 0)
 319     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 320
 321   return result;
 322 }
 323
 324 /* Adjust bitfield memory MEM so that it points to the first unit of mode
 325    MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
 326    If MODE is BLKmode, return a reference to every byte in the bitfield.
 327    Set *NEW_BITNUM to the bit position of the field within the new memory.  */
 328
 329 static rtx
 330 narrow_bit_field_mem (rtx mem, machine_mode mode,
 331                       unsigned HOST_WIDE_INT bitsize,
 332                       unsigned HOST_WIDE_INT bitnum,
 333                       unsigned HOST_WIDE_INT *new_bitnum)
 334 {
 335   if (mode == BLKmode)
 336     {
 337       *new_bitnum = bitnum % BITS_PER_UNIT;
 338       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 339       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 340                             / BITS_PER_UNIT);
 341       return adjust_bitfield_address_size (mem, mode, offset, size);
 342     }
 343   else
 344     {
 345       unsigned int unit = GET_MODE_BITSIZE (mode);
 346       *new_bitnum = bitnum % unit;
 347       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 348       return adjust_bitfield_address (mem, mode, offset);
 349     }
 350 }
 351
 352 /* The caller wants to perform insertion or extraction PATTERN on a
 353    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 354    BITREGION_START and BITREGION_END are as for store_bit_field
 355    and FIELDMODE is the natural mode of the field.
 356
 357    Search for a mode that is compatible with the memory access
 358    restrictions and (where applicable) with a register insertion or
 359    extraction.  Return the new memory on success, storing the adjusted
 360    bit position in *NEW_BITNUM.  Return null otherwise.  */
 361
 362 static rtx
 363 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 364                               rtx op0, HOST_WIDE_INT bitsize,
 365                               HOST_WIDE_INT bitnum,
 366                               unsigned HOST_WIDE_INT bitregion_start,
 367                               unsigned HOST_WIDE_INT bitregion_end,
 368                               machine_mode fieldmode,
 369                               unsigned HOST_WIDE_INT *new_bitnum)
 370 {
 371   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 372                                 bitregion_end, MEM_ALIGN (op0),
 373                                 MEM_VOLATILE_P (op0));
 374   machine_mode best_mode;
 375   if (iter.next_mode (&best_mode))
 376     {
 377       /* We can use a memory in BEST_MODE.  See whether this is true for
 378          any wider modes.  All other things being equal, we prefer to
 379          use the widest mode possible because it tends to expose more
 380          CSE opportunities.  */
 381       if (!iter.prefer_smaller_modes ())
 382         {
 383           /* Limit the search to the mode required by the corresponding
 384              register insertion or extraction instruction, if any.  */
 385           machine_mode limit_mode = word_mode;
 386           extraction_insn insn;
 387           if (get_best_reg_extraction_insn (&insn, pattern,
 388                                             GET_MODE_BITSIZE (best_mode),
 389                                             fieldmode))
 390             limit_mode = insn.field_mode;
 391
 392           machine_mode wider_mode;
 393           while (iter.next_mode (&wider_mode)
 394                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 395             best_mode = wider_mode;
 396         }
 397       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 398                                    new_bitnum);
 399     }
 400   return NULL_RTX;
 401 }
 402
 403 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 404    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 405    offset is then BITNUM / BITS_PER_UNIT.  */
 406
 407 static bool
 408 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 409                      unsigned HOST_WIDE_INT bitsize,
 410                      machine_mode struct_mode)
 411 {
 412   if (BYTES_BIG_ENDIAN)
 413     return (bitnum % BITS_PER_UNIT == 0
 414             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 415                 || (bitnum + bitsize) % BITS_PER_WORD == 0));
 416   else
 417     return bitnum % BITS_PER_WORD == 0;
 418 }
 419
 420 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 421    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 422    Return false if the access would touch memory outside the range
 423    BITREGION_START to BITREGION_END for conformance to the C++ memory
 424    model.  */
 425
 426 static bool
 427 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 428                             unsigned HOST_WIDE_INT bitnum,
 429                             machine_mode fieldmode,
 430                             unsigned HOST_WIDE_INT bitregion_start,
 431                             unsigned HOST_WIDE_INT bitregion_end)
 432 {
 433   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 434
 435   /* -fstrict-volatile-bitfields must be enabled and we must have a
 436      volatile MEM.  */
 437   if (!MEM_P (op0)
 438       || !MEM_VOLATILE_P (op0)
 439       || flag_strict_volatile_bitfields <= 0)
 440     return false;
 441
 442   /* Non-integral modes likely only happen with packed structures.
 443      Punt.  */
 444   if (!SCALAR_INT_MODE_P (fieldmode))
 445     return false;
 446
 447   /* The bit size must not be larger than the field mode, and
 448      the field mode must not be larger than a word.  */
 449   if (bitsize > modesize || modesize > BITS_PER_WORD)
 450     return false;
 451
 452   /* Check for cases of unaligned fields that must be split.  */
 453   if (bitnum % BITS_PER_UNIT + bitsize > modesize
 454       || (STRICT_ALIGNMENT
 455           && bitnum % GET_MODE_ALIGNMENT (fieldmode) + bitsize > modesize))
 456     return false;
 457
 458   /* Check for cases where the C++ memory model applies.  */
 459   if (bitregion_end != 0
 460       && (bitnum - bitnum % modesize < bitregion_start
 461           || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
 462     return false;
 463
 464   return true;
 465 }
 466
 467 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 468    bit number BITNUM can be treated as a simple value of mode MODE.  */
 469
 470 static bool
 471 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 472                        unsigned HOST_WIDE_INT bitnum, machine_mode mode)
 473 {
 474   return (MEM_P (op0)
 475           && bitnum % BITS_PER_UNIT == 0
 476           && bitsize == GET_MODE_BITSIZE (mode)
 477           && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
 478               || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
 479                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 480 }
 481 \f
 482 /* Try to use instruction INSV to store VALUE into a field of OP0.
 483    BITSIZE and BITNUM are as for store_bit_field.  */
 484
 485 static bool
 486 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 487                             unsigned HOST_WIDE_INT bitsize,
 488                             unsigned HOST_WIDE_INT bitnum,
 489                             rtx value)
 490 {
 491   struct expand_operand ops[4];
 492   rtx value1;
 493   rtx xop0 = op0;
 494   rtx_insn *last = get_last_insn ();
 495   bool copy_back = false;
 496
 497   machine_mode op_mode = insv->field_mode;
 498   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 499   if (bitsize == 0 || bitsize > unit)
 500     return false;
 501
 502   if (MEM_P (xop0))
 503     /* Get a reference to the first byte of the field.  */
 504     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 505                                  &bitnum);
 506   else
 507     {
 508       /* Convert from counting within OP0 to counting in OP_MODE.  */
 509       if (BYTES_BIG_ENDIAN)
 510         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 511
 512       /* If xop0 is a register, we need it in OP_MODE
 513          to make it acceptable to the format of insv.  */
 514       if (GET_CODE (xop0) == SUBREG)
 515         /* We can't just change the mode, because this might clobber op0,
 516            and we will need the original value of op0 if insv fails.  */
 517         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 518       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 519         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 520     }
 521
 522   /* If the destination is a paradoxical subreg such that we need a
 523      truncate to the inner mode, perform the insertion on a temporary and
 524      truncate the result to the original destination.  Note that we can't
 525      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 526      X) 0)) is (reg:N X).  */
 527   if (GET_CODE (xop0) == SUBREG
 528       && REG_P (SUBREG_REG (xop0))
 529       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 530                                          op_mode))
 531     {
 532       rtx tem = gen_reg_rtx (op_mode);
 533       emit_move_insn (tem, xop0);
 534       xop0 = tem;
 535       copy_back = true;
 536     }
 537
 538   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 539      "backwards" from the size of the unit we are inserting into.
 540      Otherwise, we count bits from the most significant on a
 541      BYTES/BITS_BIG_ENDIAN machine.  */
 542
 543   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 544     bitnum = unit - bitsize - bitnum;
 545
 546   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 547   value1 = value;
 548   if (GET_MODE (value) != op_mode)
 549     {
 550       if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 551         {
 552           /* Optimization: Don't bother really extending VALUE
 553              if it has all the bits we will actually use.  However,
 554              if we must narrow it, be sure we do it correctly.  */
 555
 556           if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 557             {
 558               rtx tmp;
 559
 560               tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 561               if (! tmp)
 562                 tmp = simplify_gen_subreg (op_mode,
 563                                            force_reg (GET_MODE (value),
 564                                                       value1),
 565                                            GET_MODE (value), 0);
 566               value1 = tmp;
 567             }
 568           else
 569             value1 = gen_lowpart (op_mode, value1);
 570         }
 571       else if (CONST_INT_P (value))
 572         value1 = gen_int_mode (INTVAL (value), op_mode);
 573       else
 574         /* Parse phase is supposed to make VALUE's data type
 575            match that of the component reference, which is a type
 576            at least as wide as the field; so VALUE should have
 577            a mode that corresponds to that type.  */
 578         gcc_assert (CONSTANT_P (value));
 579     }
 580
 581   create_fixed_operand (&ops[0], xop0);
 582   create_integer_operand (&ops[1], bitsize);
 583   create_integer_operand (&ops[2], bitnum);
 584   create_input_operand (&ops[3], value1, op_mode);
 585   if (maybe_expand_insn (insv->icode, 4, ops))
 586     {
 587       if (copy_back)
 588         convert_move (op0, xop0, true);
 589       return true;
 590     }
 591   delete_insns_since (last);
 592   return false;
 593 }
 594
 595 /* A subroutine of store_bit_field, with the same arguments.  Return true
 596    if the operation could be implemented.
 597
 598    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 599    no other way of implementing the operation.  If FALLBACK_P is false,
 600    return false instead.  */
 601
 602 static bool
 603 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 604                    unsigned HOST_WIDE_INT bitnum,
 605                    unsigned HOST_WIDE_INT bitregion_start,
 606                    unsigned HOST_WIDE_INT bitregion_end,
 607                    machine_mode fieldmode,
 608                    rtx value, bool fallback_p)
 609 {
 610   rtx op0 = str_rtx;
 611   rtx orig_value;
 612
 613   while (GET_CODE (op0) == SUBREG)
 614     {
 615       /* The following line once was done only if WORDS_BIG_ENDIAN,
 616          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 617          meaningful at a much higher level; when structures are copied
 618          between memory and regs, the higher-numbered regs
 619          always get higher addresses.  */
 620       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 621       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 622       int byte_offset = 0;
 623
 624       /* Paradoxical subregs need special handling on big endian machines.  */
 625       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 626         {
 627           int difference = inner_mode_size - outer_mode_size;
 628
 629           if (WORDS_BIG_ENDIAN)
 630             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 631           if (BYTES_BIG_ENDIAN)
 632             byte_offset += difference % UNITS_PER_WORD;
 633         }
 634       else
 635         byte_offset = SUBREG_BYTE (op0);
 636
 637       bitnum += byte_offset * BITS_PER_UNIT;
 638       op0 = SUBREG_REG (op0);
 639     }
 640
 641   /* No action is needed if the target is a register and if the field
 642      lies completely outside that register.  This can occur if the source
 643      code contains an out-of-bounds access to a small array.  */
 644   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 645     return true;
 646
 647   /* Use vec_set patterns for inserting parts of vectors whenever
 648      available.  */
 649   if (VECTOR_MODE_P (GET_MODE (op0))
 650       && !MEM_P (op0)
 651       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 652       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 653       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 654       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 655     {
 656       struct expand_operand ops[3];
 657       machine_mode outermode = GET_MODE (op0);
 658       machine_mode innermode = GET_MODE_INNER (outermode);
 659       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 660       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 661
 662       create_fixed_operand (&ops[0], op0);
 663       create_input_operand (&ops[1], value, innermode);
 664       create_integer_operand (&ops[2], pos);
 665       if (maybe_expand_insn (icode, 3, ops))
 666         return true;
 667     }
 668
 669   /* If the target is a register, overwriting the entire object, or storing
 670      a full-word or multi-word field can be done with just a SUBREG.  */
 671   if (!MEM_P (op0)
 672       && bitsize == GET_MODE_BITSIZE (fieldmode)
 673       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 674           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 675     {
 676       /* Use the subreg machinery either to narrow OP0 to the required
 677          words or to cope with mode punning between equal-sized modes.
 678          In the latter case, use subreg on the rhs side, not lhs.  */
 679       rtx sub;
 680
 681       if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
 682         {
 683           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 684           if (sub)
 685             {
 686               emit_move_insn (op0, sub);
 687               return true;
 688             }
 689         }
 690       else
 691         {
 692           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 693                                      bitnum / BITS_PER_UNIT);
 694           if (sub)
 695             {
 696               emit_move_insn (sub, value);
 697               return true;
 698             }
 699         }
 700     }
 701
 702   /* If the target is memory, storing any naturally aligned field can be
 703      done with a simple store.  For targets that support fast unaligned
 704      memory, any naturally sized, unit aligned field can be done directly.  */
 705   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
 706     {
 707       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 708       emit_move_insn (op0, value);
 709       return true;
 710     }
 711
 712   /* Make sure we are playing with integral modes.  Pun with subregs
 713      if we aren't.  This must come after the entire register case above,
 714      since that case is valid for any mode.  The following cases are only
 715      valid for integral modes.  */
 716   {
 717     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 718     if (imode != GET_MODE (op0))
 719       {
 720         if (MEM_P (op0))
 721           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
 722         else
 723           {
 724             gcc_assert (imode != BLKmode);
 725             op0 = gen_lowpart (imode, op0);
 726           }
 727       }
 728   }
 729
 730   /* Storing an lsb-aligned field in a register
 731      can be done with a movstrict instruction.  */
 732
 733   if (!MEM_P (op0)
 734       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 735       && bitsize == GET_MODE_BITSIZE (fieldmode)
 736       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 737     {
 738       struct expand_operand ops[2];
 739       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 740       rtx arg0 = op0;
 741       unsigned HOST_WIDE_INT subreg_off;
 742
 743       if (GET_CODE (arg0) == SUBREG)
 744         {
 745           /* Else we've got some float mode source being extracted into
 746              a different float mode destination -- this combination of
 747              subregs results in Severe Tire Damage.  */
 748           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 749                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 750                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 751           arg0 = SUBREG_REG (arg0);
 752         }
 753
 754       subreg_off = bitnum / BITS_PER_UNIT;
 755       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 756         {
 757           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 758
 759           create_fixed_operand (&ops[0], arg0);
 760           /* Shrink the source operand to FIELDMODE.  */
 761           create_convert_operand_to (&ops[1], value, fieldmode, false);
 762           if (maybe_expand_insn (icode, 2, ops))
 763             return true;
 764         }
 765     }
 766
 767   /* Handle fields bigger than a word.  */
 768
 769   if (bitsize > BITS_PER_WORD)
 770     {
 771       /* Here we transfer the words of the field
 772          in the order least significant first.
 773          This is because the most significant word is the one which may
 774          be less than full.
 775          However, only do that if the value is not BLKmode.  */
 776
 777       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 778       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 779       unsigned int i;
 780       rtx_insn *last;
 781
 782       /* This is the mode we must force value to, so that there will be enough
 783          subwords to extract.  Note that fieldmode will often (always?) be
 784          VOIDmode, because that is what store_field uses to indicate that this
 785          is a bit field, but passing VOIDmode to operand_subword_force
 786          is not allowed.  */
 787       fieldmode = GET_MODE (value);
 788       if (fieldmode == VOIDmode)
 789         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 790
 791       last = get_last_insn ();
 792       for (i = 0; i < nwords; i++)
 793         {
 794           /* If I is 0, use the low-order word in both field and target;
 795              if I is 1, use the next to lowest word; and so on.  */
 796           unsigned int wordnum = (backwards
 797                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 798                                   - i - 1
 799                                   : i);
 800           unsigned int bit_offset = (backwards
 801                                      ? MAX ((int) bitsize - ((int) i + 1)
 802                                             * BITS_PER_WORD,
 803                                             0)
 804                                      : (int) i * BITS_PER_WORD);
 805           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 806           unsigned HOST_WIDE_INT new_bitsize =
 807             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 808
 809           /* If the remaining chunk doesn't have full wordsize we have
 810              to make sure that for big endian machines the higher order
 811              bits are used.  */
 812           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 813             value_word = simplify_expand_binop (word_mode, lshr_optab,
 814                                                 value_word,
 815                                                 GEN_INT (BITS_PER_WORD
 816                                                          - new_bitsize),
 817                                                 NULL_RTX, true,
 818                                                 OPTAB_LIB_WIDEN);
 819
 820           if (!store_bit_field_1 (op0, new_bitsize,
 821                                   bitnum + bit_offset,
 822                                   bitregion_start, bitregion_end,
 823                                   word_mode,
 824                                   value_word, fallback_p))
 825             {
 826               delete_insns_since (last);
 827               return false;
 828             }
 829         }
 830       return true;
 831     }
 832
 833   /* If VALUE has a floating-point or complex mode, access it as an
 834      integer of the corresponding size.  This can occur on a machine
 835      with 64 bit registers that uses SFmode for float.  It can also
 836      occur for unaligned float or complex fields.  */
 837   orig_value = value;
 838   if (GET_MODE (value) != VOIDmode
 839       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 840       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 841     {
 842       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 843       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 844     }
 845
 846   /* If OP0 is a multi-word register, narrow it to the affected word.
 847      If the region spans two words, defer to store_split_bit_field.  */
 848   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 849     {
 850       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
 851                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 852       gcc_assert (op0);
 853       bitnum %= BITS_PER_WORD;
 854       if (bitnum + bitsize > BITS_PER_WORD)
 855         {
 856           if (!fallback_p)
 857             return false;
 858
 859           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 860                                  bitregion_end, value);
 861           return true;
 862         }
 863     }
 864
 865   /* From here on we can assume that the field to be stored in fits
 866      within a word.  If the destination is a register, it too fits
 867      in a word.  */
 868
 869   extraction_insn insv;
 870   if (!MEM_P (op0)
 871       && get_best_reg_extraction_insn (&insv, EP_insv,
 872                                        GET_MODE_BITSIZE (GET_MODE (op0)),
 873                                        fieldmode)
 874       && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 875     return true;
 876
 877   /* If OP0 is a memory, try copying it to a register and seeing if a
 878      cheap register alternative is available.  */
 879   if (MEM_P (op0))
 880     {
 881       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
 882                                         fieldmode)
 883           && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 884         return true;
 885
 886       rtx_insn *last = get_last_insn ();
 887
 888       /* Try loading part of OP0 into a register, inserting the bitfield
 889          into that, and then copying the result back to OP0.  */
 890       unsigned HOST_WIDE_INT bitpos;
 891       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
 892                                                bitregion_start, bitregion_end,
 893                                                fieldmode, &bitpos);
 894       if (xop0)
 895         {
 896           rtx tempreg = copy_to_reg (xop0);
 897           if (store_bit_field_1 (tempreg, bitsize, bitpos,
 898                                  bitregion_start, bitregion_end,
 899                                  fieldmode, orig_value, false))
 900             {
 901               emit_move_insn (xop0, tempreg);
 902               return true;
 903             }
 904           delete_insns_since (last);
 905         }
 906     }
 907
 908   if (!fallback_p)
 909     return false;
 910
 911   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
 912                          bitregion_end, value);
 913   return true;
 914 }
 915
 916 /* Generate code to store value from rtx VALUE
 917    into a bit-field within structure STR_RTX
 918    containing BITSIZE bits starting at bit BITNUM.
 919
 920    BITREGION_START is bitpos of the first bitfield in this region.
 921    BITREGION_END is the bitpos of the ending bitfield in this region.
 922    These two fields are 0, if the C++ memory model does not apply,
 923    or we are not interested in keeping track of bitfield regions.
 924
 925    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 926
 927 void
 928 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 929                  unsigned HOST_WIDE_INT bitnum,
 930                  unsigned HOST_WIDE_INT bitregion_start,
 931                  unsigned HOST_WIDE_INT bitregion_end,
 932                  machine_mode fieldmode,
 933                  rtx value)
 934 {
 935   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
 936   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
 937                                   bitregion_start, bitregion_end))
 938     {
 939       /* Storing any naturally aligned field can be done with a simple
 940          store.  For targets that support fast unaligned memory, any
 941          naturally sized, unit aligned field can be done directly.  */
 942       if (simple_mem_bitfield_p (str_rtx, bitsize, bitnum, fieldmode))
 943         {
 944           str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
 945                                              bitnum / BITS_PER_UNIT);
 946           emit_move_insn (str_rtx, value);
 947         }
 948       else
 949         {
 950           str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
 951                                           &bitnum);
 952           /* Explicitly override the C/C++ memory model; ignore the
 953              bit range so that we can do the access in the mode mandated
 954              by -fstrict-volatile-bitfields instead.  */
 955           store_fixed_bit_field_1 (str_rtx, bitsize, bitnum, value);
 956         }
 957
 958       return;
 959     }
 960
 961   /* Under the C++0x memory model, we must not touch bits outside the
 962      bit region.  Adjust the address to start at the beginning of the
 963      bit region.  */
 964   if (MEM_P (str_rtx) && bitregion_start > 0)
 965     {
 966       machine_mode bestmode;
 967       HOST_WIDE_INT offset, size;
 968
 969       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
 970
 971       offset = bitregion_start / BITS_PER_UNIT;
 972       bitnum -= bitregion_start;
 973       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
 974       bitregion_end -= bitregion_start;
 975       bitregion_start = 0;
 976       bestmode = get_best_mode (bitsize, bitnum,
 977                                 bitregion_start, bitregion_end,
 978                                 MEM_ALIGN (str_rtx), VOIDmode,
 979                                 MEM_VOLATILE_P (str_rtx));
 980       str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
 981     }
 982
 983   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
 984                           bitregion_start, bitregion_end,
 985                           fieldmode, value, true))
 986     gcc_unreachable ();
 987 }
 988 \f
 989 /* Use shifts and boolean operations to store VALUE into a bit field of
 990    width BITSIZE in OP0, starting at bit BITNUM.  */
 991
 992 static void
 993 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 994                        unsigned HOST_WIDE_INT bitnum,
 995                        unsigned HOST_WIDE_INT bitregion_start,
 996                        unsigned HOST_WIDE_INT bitregion_end,
 997                        rtx value)
 998 {
 999   /* There is a case not handled here:
1000      a structure with a known alignment of just a halfword
1001      and a field split across two aligned halfwords within the structure.
1002      Or likewise a structure with a known alignment of just a byte
1003      and a field split across two bytes.
1004      Such cases are not supposed to be able to occur.  */
1005
1006   if (MEM_P (op0))
1007     {
1008       machine_mode mode = GET_MODE (op0);
1009       if (GET_MODE_BITSIZE (mode) == 0
1010           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1011         mode = word_mode;
1012       mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1013                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1014
1015       if (mode == VOIDmode)
1016         {
1017           /* The only way this should occur is if the field spans word
1018              boundaries.  */
1019           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1020                                  bitregion_end, value);
1021           return;
1022         }
1023
1024       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1025     }
1026
1027   store_fixed_bit_field_1 (op0, bitsize, bitnum, value);
1028 }
1029
1030 /* Helper function for store_fixed_bit_field, stores
1031    the bit field always using the MODE of OP0.  */
1032
1033 static void
1034 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1035                          unsigned HOST_WIDE_INT bitnum,
1036                          rtx value)
1037 {
1038   machine_mode mode;
1039   rtx temp;
1040   int all_zero = 0;
1041   int all_one = 0;
1042
1043   mode = GET_MODE (op0);
1044   gcc_assert (SCALAR_INT_MODE_P (mode));
1045
1046   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1047      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1048
1049   if (BYTES_BIG_ENDIAN)
1050     /* BITNUM is the distance between our msb
1051        and that of the containing datum.
1052        Convert it to the distance from the lsb.  */
1053     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1054
1055   /* Now BITNUM is always the distance between our lsb
1056      and that of OP0.  */
1057
1058   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1059      we must first convert its mode to MODE.  */
1060
1061   if (CONST_INT_P (value))
1062     {
1063       unsigned HOST_WIDE_INT v = UINTVAL (value);
1064
1065       if (bitsize < HOST_BITS_PER_WIDE_INT)
1066         v &= ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1;
1067
1068       if (v == 0)
1069         all_zero = 1;
1070       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1071                 && v == ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1)
1072                || (bitsize == HOST_BITS_PER_WIDE_INT
1073                    && v == (unsigned HOST_WIDE_INT) -1))
1074         all_one = 1;
1075
1076       value = lshift_value (mode, v, bitnum);
1077     }
1078   else
1079     {
1080       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1081                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1082
1083       if (GET_MODE (value) != mode)
1084         value = convert_to_mode (mode, value, 1);
1085
1086       if (must_and)
1087         value = expand_binop (mode, and_optab, value,
1088                               mask_rtx (mode, 0, bitsize, 0),
1089                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1090       if (bitnum > 0)
1091         value = expand_shift (LSHIFT_EXPR, mode, value,
1092                               bitnum, NULL_RTX, 1);
1093     }
1094
1095   /* Now clear the chosen bits in OP0,
1096      except that if VALUE is -1 we need not bother.  */
1097   /* We keep the intermediates in registers to allow CSE to combine
1098      consecutive bitfield assignments.  */
1099
1100   temp = force_reg (mode, op0);
1101
1102   if (! all_one)
1103     {
1104       temp = expand_binop (mode, and_optab, temp,
1105                            mask_rtx (mode, bitnum, bitsize, 1),
1106                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1107       temp = force_reg (mode, temp);
1108     }
1109
1110   /* Now logical-or VALUE into OP0, unless it is zero.  */
1111
1112   if (! all_zero)
1113     {
1114       temp = expand_binop (mode, ior_optab, temp, value,
1115                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1116       temp = force_reg (mode, temp);
1117     }
1118
1119   if (op0 != temp)
1120     {
1121       op0 = copy_rtx (op0);
1122       emit_move_insn (op0, temp);
1123     }
1124 }
1125 \f
1126 /* Store a bit field that is split across multiple accessible memory objects.
1127
1128    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1129    BITSIZE is the field width; BITPOS the position of its first bit
1130    (within the word).
1131    VALUE is the value to store.
1132
1133    This does not yet handle fields wider than BITS_PER_WORD.  */
1134
1135 static void
1136 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1137                        unsigned HOST_WIDE_INT bitpos,
1138                        unsigned HOST_WIDE_INT bitregion_start,
1139                        unsigned HOST_WIDE_INT bitregion_end,
1140                        rtx value)
1141 {
1142   unsigned int unit;
1143   unsigned int bitsdone = 0;
1144
1145   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1146      much at a time.  */
1147   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1148     unit = BITS_PER_WORD;
1149   else
1150     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1151
1152   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1153      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1154      again, and we will mutually recurse forever.  */
1155   if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1156     unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1157
1158   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1159      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1160      that VALUE might be a floating-point constant.  */
1161   if (CONSTANT_P (value) && !CONST_INT_P (value))
1162     {
1163       rtx word = gen_lowpart_common (word_mode, value);
1164
1165       if (word && (value != word))
1166         value = word;
1167       else
1168         value = gen_lowpart_common (word_mode,
1169                                     force_reg (GET_MODE (value) != VOIDmode
1170                                                ? GET_MODE (value)
1171                                                : word_mode, value));
1172     }
1173
1174   while (bitsdone < bitsize)
1175     {
1176       unsigned HOST_WIDE_INT thissize;
1177       rtx part, word;
1178       unsigned HOST_WIDE_INT thispos;
1179       unsigned HOST_WIDE_INT offset;
1180
1181       offset = (bitpos + bitsdone) / unit;
1182       thispos = (bitpos + bitsdone) % unit;
1183
1184       /* When region of bytes we can touch is restricted, decrease
1185          UNIT close to the end of the region as needed.  If op0 is a REG
1186          or SUBREG of REG, don't do this, as there can't be data races
1187          on a register and we can expand shorter code in some cases.  */
1188       if (bitregion_end
1189           && unit > BITS_PER_UNIT
1190           && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1191           && !REG_P (op0)
1192           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1193         {
1194           unit = unit / 2;
1195           continue;
1196         }
1197
1198       /* THISSIZE must not overrun a word boundary.  Otherwise,
1199          store_fixed_bit_field will call us again, and we will mutually
1200          recurse forever.  */
1201       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1202       thissize = MIN (thissize, unit - thispos);
1203
1204       if (BYTES_BIG_ENDIAN)
1205         {
1206           /* Fetch successively less significant portions.  */
1207           if (CONST_INT_P (value))
1208             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1209                              >> (bitsize - bitsdone - thissize))
1210                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1211           else
1212             {
1213               int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1214               /* The args are chosen so that the last part includes the
1215                  lsb.  Give extract_bit_field the value it needs (with
1216                  endianness compensation) to fetch the piece we want.  */
1217               part = extract_fixed_bit_field (word_mode, value, thissize,
1218                                               total_bits - bitsize + bitsdone,
1219                                               NULL_RTX, 1);
1220             }
1221         }
1222       else
1223         {
1224           /* Fetch successively more significant portions.  */
1225           if (CONST_INT_P (value))
1226             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1227                              >> bitsdone)
1228                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1229           else
1230             part = extract_fixed_bit_field (word_mode, value, thissize,
1231                                             bitsdone, NULL_RTX, 1);
1232         }
1233
1234       /* If OP0 is a register, then handle OFFSET here.
1235
1236          When handling multiword bitfields, extract_bit_field may pass
1237          down a word_mode SUBREG of a larger REG for a bitfield that actually
1238          crosses a word boundary.  Thus, for a SUBREG, we must find
1239          the current word starting from the base register.  */
1240       if (GET_CODE (op0) == SUBREG)
1241         {
1242           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD)
1243                             + (offset * unit / BITS_PER_WORD);
1244           machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1245           if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1246             word = word_offset ? const0_rtx : op0;
1247           else
1248             word = operand_subword_force (SUBREG_REG (op0), word_offset,
1249                                           GET_MODE (SUBREG_REG (op0)));
1250           offset &= BITS_PER_WORD / unit - 1;
1251         }
1252       else if (REG_P (op0))
1253         {
1254           machine_mode op0_mode = GET_MODE (op0);
1255           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1256             word = offset ? const0_rtx : op0;
1257           else
1258             word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1259                                           GET_MODE (op0));
1260           offset &= BITS_PER_WORD / unit - 1;
1261         }
1262       else
1263         word = op0;
1264
1265       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1266          it is just an out-of-bounds access.  Ignore it.  */
1267       if (word != const0_rtx)
1268         store_fixed_bit_field (word, thissize, offset * unit + thispos,
1269                                bitregion_start, bitregion_end, part);
1270       bitsdone += thissize;
1271     }
1272 }
1273 \f
1274 /* A subroutine of extract_bit_field_1 that converts return value X
1275    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1276    to extract_bit_field.  */
1277
1278 static rtx
1279 convert_extracted_bit_field (rtx x, machine_mode mode,
1280                              machine_mode tmode, bool unsignedp)
1281 {
1282   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1283     return x;
1284
1285   /* If the x mode is not a scalar integral, first convert to the
1286      integer mode of that size and then access it as a floating-point
1287      value via a SUBREG.  */
1288   if (!SCALAR_INT_MODE_P (tmode))
1289     {
1290       machine_mode smode;
1291
1292       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1293       x = convert_to_mode (smode, x, unsignedp);
1294       x = force_reg (smode, x);
1295       return gen_lowpart (tmode, x);
1296     }
1297
1298   return convert_to_mode (tmode, x, unsignedp);
1299 }
1300
1301 /* Try to use an ext(z)v pattern to extract a field from OP0.
1302    Return the extracted value on success, otherwise return null.
1303    EXT_MODE is the mode of the extraction and the other arguments
1304    are as for extract_bit_field.  */
1305
1306 static rtx
1307 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1308                               unsigned HOST_WIDE_INT bitsize,
1309                               unsigned HOST_WIDE_INT bitnum,
1310                               int unsignedp, rtx target,
1311                               machine_mode mode, machine_mode tmode)
1312 {
1313   struct expand_operand ops[4];
1314   rtx spec_target = target;
1315   rtx spec_target_subreg = 0;
1316   machine_mode ext_mode = extv->field_mode;
1317   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1318
1319   if (bitsize == 0 || unit < bitsize)
1320     return NULL_RTX;
1321
1322   if (MEM_P (op0))
1323     /* Get a reference to the first byte of the field.  */
1324     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1325                                 &bitnum);
1326   else
1327     {
1328       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1329       if (BYTES_BIG_ENDIAN)
1330         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1331
1332       /* If op0 is a register, we need it in EXT_MODE to make it
1333          acceptable to the format of ext(z)v.  */
1334       if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1335         return NULL_RTX;
1336       if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1337         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1338     }
1339
1340   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1341      "backwards" from the size of the unit we are extracting from.
1342      Otherwise, we count bits from the most significant on a
1343      BYTES/BITS_BIG_ENDIAN machine.  */
1344
1345   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1346     bitnum = unit - bitsize - bitnum;
1347
1348   if (target == 0)
1349     target = spec_target = gen_reg_rtx (tmode);
1350
1351   if (GET_MODE (target) != ext_mode)
1352     {
1353       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1354          between the mode of the extraction (word_mode) and the target
1355          mode.  Instead, create a temporary and use convert_move to set
1356          the target.  */
1357       if (REG_P (target)
1358           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1359         {
1360           target = gen_lowpart (ext_mode, target);
1361           if (GET_MODE_PRECISION (ext_mode)
1362               > GET_MODE_PRECISION (GET_MODE (spec_target)))
1363             spec_target_subreg = target;
1364         }
1365       else
1366         target = gen_reg_rtx (ext_mode);
1367     }
1368
1369   create_output_operand (&ops[0], target, ext_mode);
1370   create_fixed_operand (&ops[1], op0);
1371   create_integer_operand (&ops[2], bitsize);
1372   create_integer_operand (&ops[3], bitnum);
1373   if (maybe_expand_insn (extv->icode, 4, ops))
1374     {
1375       target = ops[0].value;
1376       if (target == spec_target)
1377         return target;
1378       if (target == spec_target_subreg)
1379         return spec_target;
1380       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1381     }
1382   return NULL_RTX;
1383 }
1384
1385 /* A subroutine of extract_bit_field, with the same arguments.
1386    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1387    if we can find no other means of implementing the operation.
1388    if FALLBACK_P is false, return NULL instead.  */
1389
1390 static rtx
1391 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1392                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1393                      machine_mode mode, machine_mode tmode,
1394                      bool fallback_p)
1395 {
1396   rtx op0 = str_rtx;
1397   machine_mode int_mode;
1398   machine_mode mode1;
1399
1400   if (tmode == VOIDmode)
1401     tmode = mode;
1402
1403   while (GET_CODE (op0) == SUBREG)
1404     {
1405       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1406       op0 = SUBREG_REG (op0);
1407     }
1408
1409   /* If we have an out-of-bounds access to a register, just return an
1410      uninitialized register of the required mode.  This can occur if the
1411      source code contains an out-of-bounds access to a small array.  */
1412   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1413     return gen_reg_rtx (tmode);
1414
1415   if (REG_P (op0)
1416       && mode == GET_MODE (op0)
1417       && bitnum == 0
1418       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1419     {
1420       /* We're trying to extract a full register from itself.  */
1421       return op0;
1422     }
1423
1424   /* See if we can get a better vector mode before extracting.  */
1425   if (VECTOR_MODE_P (GET_MODE (op0))
1426       && !MEM_P (op0)
1427       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1428     {
1429       machine_mode new_mode;
1430
1431       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1432         new_mode = MIN_MODE_VECTOR_FLOAT;
1433       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1434         new_mode = MIN_MODE_VECTOR_FRACT;
1435       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1436         new_mode = MIN_MODE_VECTOR_UFRACT;
1437       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1438         new_mode = MIN_MODE_VECTOR_ACCUM;
1439       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1440         new_mode = MIN_MODE_VECTOR_UACCUM;
1441       else
1442         new_mode = MIN_MODE_VECTOR_INT;
1443
1444       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1445         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1446             && targetm.vector_mode_supported_p (new_mode))
1447           break;
1448       if (new_mode != VOIDmode)
1449         op0 = gen_lowpart (new_mode, op0);
1450     }
1451
1452   /* Use vec_extract patterns for extracting parts of vectors whenever
1453      available.  */
1454   if (VECTOR_MODE_P (GET_MODE (op0))
1455       && !MEM_P (op0)
1456       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1457       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1458           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1459     {
1460       struct expand_operand ops[3];
1461       machine_mode outermode = GET_MODE (op0);
1462       machine_mode innermode = GET_MODE_INNER (outermode);
1463       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1464       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1465
1466       create_output_operand (&ops[0], target, innermode);
1467       create_input_operand (&ops[1], op0, outermode);
1468       create_integer_operand (&ops[2], pos);
1469       if (maybe_expand_insn (icode, 3, ops))
1470         {
1471           target = ops[0].value;
1472           if (GET_MODE (target) != mode)
1473             return gen_lowpart (tmode, target);
1474           return target;
1475         }
1476     }
1477
1478   /* Make sure we are playing with integral modes.  Pun with subregs
1479      if we aren't.  */
1480   {
1481     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1482     if (imode != GET_MODE (op0))
1483       {
1484         if (MEM_P (op0))
1485           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1486         else if (imode != BLKmode)
1487           {
1488             op0 = gen_lowpart (imode, op0);
1489
1490             /* If we got a SUBREG, force it into a register since we
1491                aren't going to be able to do another SUBREG on it.  */
1492             if (GET_CODE (op0) == SUBREG)
1493               op0 = force_reg (imode, op0);
1494           }
1495         else if (REG_P (op0))
1496           {
1497             rtx reg, subreg;
1498             imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1499                                             MODE_INT);
1500             reg = gen_reg_rtx (imode);
1501             subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1502             emit_move_insn (subreg, op0);
1503             op0 = reg;
1504             bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1505           }
1506         else
1507           {
1508             HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1509             rtx mem = assign_stack_temp (GET_MODE (op0), size);
1510             emit_move_insn (mem, op0);
1511             op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1512           }
1513       }
1514   }
1515
1516   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1517      If that's wrong, the solution is to test for it and set TARGET to 0
1518      if needed.  */
1519
1520   /* Get the mode of the field to use for atomic access or subreg
1521      conversion.  */
1522   mode1 = mode;
1523   if (SCALAR_INT_MODE_P (tmode))
1524     {
1525       machine_mode try_mode = mode_for_size (bitsize,
1526                                                   GET_MODE_CLASS (tmode), 0);
1527       if (try_mode != BLKmode)
1528         mode1 = try_mode;
1529     }
1530   gcc_assert (mode1 != BLKmode);
1531
1532   /* Extraction of a full MODE1 value can be done with a subreg as long
1533      as the least significant bit of the value is the least significant
1534      bit of either OP0 or a word of OP0.  */
1535   if (!MEM_P (op0)
1536       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1537       && bitsize == GET_MODE_BITSIZE (mode1)
1538       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1539     {
1540       rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1541                                      bitnum / BITS_PER_UNIT);
1542       if (sub)
1543         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1544     }
1545
1546   /* Extraction of a full MODE1 value can be done with a load as long as
1547      the field is on a byte boundary and is sufficiently aligned.  */
1548   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1549     {
1550       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1551       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1552     }
1553
1554   /* Handle fields bigger than a word.  */
1555
1556   if (bitsize > BITS_PER_WORD)
1557     {
1558       /* Here we transfer the words of the field
1559          in the order least significant first.
1560          This is because the most significant word is the one which may
1561          be less than full.  */
1562
1563       unsigned int backwards = WORDS_BIG_ENDIAN;
1564       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1565       unsigned int i;
1566       rtx_insn *last;
1567
1568       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1569         target = gen_reg_rtx (mode);
1570
1571       /* Indicate for flow that the entire target reg is being set.  */
1572       emit_clobber (target);
1573
1574       last = get_last_insn ();
1575       for (i = 0; i < nwords; i++)
1576         {
1577           /* If I is 0, use the low-order word in both field and target;
1578              if I is 1, use the next to lowest word; and so on.  */
1579           /* Word number in TARGET to use.  */
1580           unsigned int wordnum
1581             = (backwards
1582                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1583                : i);
1584           /* Offset from start of field in OP0.  */
1585           unsigned int bit_offset = (backwards
1586                                      ? MAX ((int) bitsize - ((int) i + 1)
1587                                             * BITS_PER_WORD,
1588                                             0)
1589                                      : (int) i * BITS_PER_WORD);
1590           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1591           rtx result_part
1592             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1593                                              bitsize - i * BITS_PER_WORD),
1594                                    bitnum + bit_offset, 1, target_part,
1595                                    mode, word_mode, fallback_p);
1596
1597           gcc_assert (target_part);
1598           if (!result_part)
1599             {
1600               delete_insns_since (last);
1601               return NULL;
1602             }
1603
1604           if (result_part != target_part)
1605             emit_move_insn (target_part, result_part);
1606         }
1607
1608       if (unsignedp)
1609         {
1610           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1611              need to be zero'd out.  */
1612           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1613             {
1614               unsigned int i, total_words;
1615
1616               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1617               for (i = nwords; i < total_words; i++)
1618                 emit_move_insn
1619                   (operand_subword (target,
1620                                     backwards ? total_words - i - 1 : i,
1621                                     1, VOIDmode),
1622                    const0_rtx);
1623             }
1624           return target;
1625         }
1626
1627       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1628       target = expand_shift (LSHIFT_EXPR, mode, target,
1629                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1630       return expand_shift (RSHIFT_EXPR, mode, target,
1631                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1632     }
1633
1634   /* If OP0 is a multi-word register, narrow it to the affected word.
1635      If the region spans two words, defer to extract_split_bit_field.  */
1636   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1637     {
1638       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1639                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1640       bitnum %= BITS_PER_WORD;
1641       if (bitnum + bitsize > BITS_PER_WORD)
1642         {
1643           if (!fallback_p)
1644             return NULL_RTX;
1645           target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1646           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1647         }
1648     }
1649
1650   /* From here on we know the desired field is smaller than a word.
1651      If OP0 is a register, it too fits within a word.  */
1652   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1653   extraction_insn extv;
1654   if (!MEM_P (op0)
1655       /* ??? We could limit the structure size to the part of OP0 that
1656          contains the field, with appropriate checks for endianness
1657          and TRULY_NOOP_TRUNCATION.  */
1658       && get_best_reg_extraction_insn (&extv, pattern,
1659                                        GET_MODE_BITSIZE (GET_MODE (op0)),
1660                                        tmode))
1661     {
1662       rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1663                                                  unsignedp, target, mode,
1664                                                  tmode);
1665       if (result)
1666         return result;
1667     }
1668
1669   /* If OP0 is a memory, try copying it to a register and seeing if a
1670      cheap register alternative is available.  */
1671   if (MEM_P (op0))
1672     {
1673       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1674                                         tmode))
1675         {
1676           rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1677                                                      bitnum, unsignedp,
1678                                                      target, mode,
1679                                                      tmode);
1680           if (result)
1681             return result;
1682         }
1683
1684       rtx_insn *last = get_last_insn ();
1685
1686       /* Try loading part of OP0 into a register and extracting the
1687          bitfield from that.  */
1688       unsigned HOST_WIDE_INT bitpos;
1689       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1690                                                0, 0, tmode, &bitpos);
1691       if (xop0)
1692         {
1693           xop0 = copy_to_reg (xop0);
1694           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1695                                             unsignedp, target,
1696                                             mode, tmode, false);
1697           if (result)
1698             return result;
1699           delete_insns_since (last);
1700         }
1701     }
1702
1703   if (!fallback_p)
1704     return NULL;
1705
1706   /* Find a correspondingly-sized integer field, so we can apply
1707      shifts and masks to it.  */
1708   int_mode = int_mode_for_mode (tmode);
1709   if (int_mode == BLKmode)
1710     int_mode = int_mode_for_mode (mode);
1711   /* Should probably push op0 out to memory and then do a load.  */
1712   gcc_assert (int_mode != BLKmode);
1713
1714   target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
1715                                     target, unsignedp);
1716   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1717 }
1718
1719 /* Generate code to extract a byte-field from STR_RTX
1720    containing BITSIZE bits, starting at BITNUM,
1721    and put it in TARGET if possible (if TARGET is nonzero).
1722    Regardless of TARGET, we return the rtx for where the value is placed.
1723
1724    STR_RTX is the structure containing the byte (a REG or MEM).
1725    UNSIGNEDP is nonzero if this is an unsigned bit field.
1726    MODE is the natural mode of the field value once extracted.
1727    TMODE is the mode the caller would like the value to have;
1728    but the value may be returned with type MODE instead.
1729
1730    If a TARGET is specified and we can store in it at no extra cost,
1731    we do so, and return TARGET.
1732    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1733    if they are equally easy.  */
1734
1735 rtx
1736 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1737                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1738                    machine_mode mode, machine_mode tmode)
1739 {
1740   machine_mode mode1;
1741
1742   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1743   if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1744     mode1 = GET_MODE (str_rtx);
1745   else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1746     mode1 = GET_MODE (target);
1747   else
1748     mode1 = tmode;
1749
1750   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1751     {
1752       rtx result;
1753
1754       /* Extraction of a full MODE1 value can be done with a load as long as
1755          the field is on a byte boundary and is sufficiently aligned.  */
1756       if (simple_mem_bitfield_p (str_rtx, bitsize, bitnum, mode1))
1757         result = adjust_bitfield_address (str_rtx, mode1,
1758                                           bitnum / BITS_PER_UNIT);
1759       else
1760         {
1761           str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1762                                           &bitnum);
1763           result = extract_fixed_bit_field_1 (mode, str_rtx, bitsize, bitnum,
1764                                               target, unsignedp);
1765         }
1766
1767       return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1768     }
1769
1770   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1771                               target, mode, tmode, true);
1772 }
1773 \f
1774 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1775    from bit BITNUM of OP0.
1776
1777    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1778    If TARGET is nonzero, attempts to store the value there
1779    and return TARGET, but this is not guaranteed.
1780    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1781
1782 static rtx
1783 extract_fixed_bit_field (machine_mode tmode, rtx op0,
1784                          unsigned HOST_WIDE_INT bitsize,
1785                          unsigned HOST_WIDE_INT bitnum, rtx target,
1786                          int unsignedp)
1787 {
1788   if (MEM_P (op0))
1789     {
1790       machine_mode mode
1791         = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1792                          MEM_VOLATILE_P (op0));
1793
1794       if (mode == VOIDmode)
1795         /* The only way this should occur is if the field spans word
1796            boundaries.  */
1797         return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1798
1799       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1800     }
1801
1802   return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1803                                     target, unsignedp);
1804 }
1805
1806 /* Helper function for extract_fixed_bit_field, extracts
1807    the bit field always using the MODE of OP0.  */
1808
1809 static rtx
1810 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
1811                            unsigned HOST_WIDE_INT bitsize,
1812                            unsigned HOST_WIDE_INT bitnum, rtx target,
1813                            int unsignedp)
1814 {
1815   machine_mode mode = GET_MODE (op0);
1816   gcc_assert (SCALAR_INT_MODE_P (mode));
1817
1818   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1819      for invalid input, such as extract equivalent of f5 from
1820      gcc.dg/pr48335-2.c.  */
1821
1822   if (BYTES_BIG_ENDIAN)
1823     /* BITNUM is the distance between our msb and that of OP0.
1824        Convert it to the distance from the lsb.  */
1825     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1826
1827   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1828      We have reduced the big-endian case to the little-endian case.  */
1829
1830   if (unsignedp)
1831     {
1832       if (bitnum)
1833         {
1834           /* If the field does not already start at the lsb,
1835              shift it so it does.  */
1836           /* Maybe propagate the target for the shift.  */
1837           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1838           if (tmode != mode)
1839             subtarget = 0;
1840           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1841         }
1842       /* Convert the value to the desired mode.  */
1843       if (mode != tmode)
1844         op0 = convert_to_mode (tmode, op0, 1);
1845
1846       /* Unless the msb of the field used to be the msb when we shifted,
1847          mask out the upper bits.  */
1848
1849       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
1850         return expand_binop (GET_MODE (op0), and_optab, op0,
1851                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1852                              target, 1, OPTAB_LIB_WIDEN);
1853       return op0;
1854     }
1855
1856   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1857      then arithmetic-shift its lsb to the lsb of the word.  */
1858   op0 = force_reg (mode, op0);
1859
1860   /* Find the narrowest integer mode that contains the field.  */
1861
1862   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1863        mode = GET_MODE_WIDER_MODE (mode))
1864     if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
1865       {
1866         op0 = convert_to_mode (mode, op0, 0);
1867         break;
1868       }
1869
1870   if (mode != tmode)
1871     target = 0;
1872
1873   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
1874     {
1875       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
1876       /* Maybe propagate the target for the shift.  */
1877       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1878       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1879     }
1880
1881   return expand_shift (RSHIFT_EXPR, mode, op0,
1882                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1883 }
1884
1885 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1886    VALUE << BITPOS.  */
1887
1888 static rtx
1889 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
1890               int bitpos)
1891 {
1892   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
1893 }
1894 \f
1895 /* Extract a bit field that is split across two words
1896    and return an RTX for the result.
1897
1898    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1899    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1900    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1901
1902 static rtx
1903 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1904                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1905 {
1906   unsigned int unit;
1907   unsigned int bitsdone = 0;
1908   rtx result = NULL_RTX;
1909   int first = 1;
1910
1911   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1912      much at a time.  */
1913   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1914     unit = BITS_PER_WORD;
1915   else
1916     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1917
1918   while (bitsdone < bitsize)
1919     {
1920       unsigned HOST_WIDE_INT thissize;
1921       rtx part, word;
1922       unsigned HOST_WIDE_INT thispos;
1923       unsigned HOST_WIDE_INT offset;
1924
1925       offset = (bitpos + bitsdone) / unit;
1926       thispos = (bitpos + bitsdone) % unit;
1927
1928       /* THISSIZE must not overrun a word boundary.  Otherwise,
1929          extract_fixed_bit_field will call us again, and we will mutually
1930          recurse forever.  */
1931       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1932       thissize = MIN (thissize, unit - thispos);
1933
1934       /* If OP0 is a register, then handle OFFSET here.
1935
1936          When handling multiword bitfields, extract_bit_field may pass
1937          down a word_mode SUBREG of a larger REG for a bitfield that actually
1938          crosses a word boundary.  Thus, for a SUBREG, we must find
1939          the current word starting from the base register.  */
1940       if (GET_CODE (op0) == SUBREG)
1941         {
1942           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1943           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1944                                         GET_MODE (SUBREG_REG (op0)));
1945           offset = 0;
1946         }
1947       else if (REG_P (op0))
1948         {
1949           word = operand_subword_force (op0, offset, GET_MODE (op0));
1950           offset = 0;
1951         }
1952       else
1953         word = op0;
1954
1955       /* Extract the parts in bit-counting order,
1956          whose meaning is determined by BYTES_PER_UNIT.
1957          OFFSET is in UNITs, and UNIT is in bits.  */
1958       part = extract_fixed_bit_field (word_mode, word, thissize,
1959                                       offset * unit + thispos, 0, 1);
1960       bitsdone += thissize;
1961
1962       /* Shift this part into place for the result.  */
1963       if (BYTES_BIG_ENDIAN)
1964         {
1965           if (bitsize != bitsdone)
1966             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1967                                  bitsize - bitsdone, 0, 1);
1968         }
1969       else
1970         {
1971           if (bitsdone != thissize)
1972             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1973                                  bitsdone - thissize, 0, 1);
1974         }
1975
1976       if (first)
1977         result = part;
1978       else
1979         /* Combine the parts with bitwise or.  This works
1980            because we extracted each part as an unsigned bit field.  */
1981         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1982                                OPTAB_LIB_WIDEN);
1983
1984       first = 0;
1985     }
1986
1987   /* Unsigned bit field: we are done.  */
1988   if (unsignedp)
1989     return result;
1990   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1991   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1992                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
1993   return expand_shift (RSHIFT_EXPR, word_mode, result,
1994                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
1995 }
1996 \f
1997 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
1998    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
1999    MODE, fill the upper bits with zeros.  Fail if the layout of either
2000    mode is unknown (as for CC modes) or if the extraction would involve
2001    unprofitable mode punning.  Return the value on success, otherwise
2002    return null.
2003
2004    This is different from gen_lowpart* in these respects:
2005
2006      - the returned value must always be considered an rvalue
2007
2008      - when MODE is wider than SRC_MODE, the extraction involves
2009        a zero extension
2010
2011      - when MODE is smaller than SRC_MODE, the extraction involves
2012        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2013
2014    In other words, this routine performs a computation, whereas the
2015    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2016    operations.  */
2017
2018 rtx
2019 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2020 {
2021   machine_mode int_mode, src_int_mode;
2022
2023   if (mode == src_mode)
2024     return src;
2025
2026   if (CONSTANT_P (src))
2027     {
2028       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2029          fails, it will happily create (subreg (symbol_ref)) or similar
2030          invalid SUBREGs.  */
2031       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2032       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2033       if (ret)
2034         return ret;
2035
2036       if (GET_MODE (src) == VOIDmode
2037           || !validate_subreg (mode, src_mode, src, byte))
2038         return NULL_RTX;
2039
2040       src = force_reg (GET_MODE (src), src);
2041       return gen_rtx_SUBREG (mode, src, byte);
2042     }
2043
2044   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2045     return NULL_RTX;
2046
2047   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2048       && MODES_TIEABLE_P (mode, src_mode))
2049     {
2050       rtx x = gen_lowpart_common (mode, src);
2051       if (x)
2052         return x;
2053     }
2054
2055   src_int_mode = int_mode_for_mode (src_mode);
2056   int_mode = int_mode_for_mode (mode);
2057   if (src_int_mode == BLKmode || int_mode == BLKmode)
2058     return NULL_RTX;
2059
2060   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2061     return NULL_RTX;
2062   if (!MODES_TIEABLE_P (int_mode, mode))
2063     return NULL_RTX;
2064
2065   src = gen_lowpart (src_int_mode, src);
2066   src = convert_modes (int_mode, src_int_mode, src, true);
2067   src = gen_lowpart (mode, src);
2068   return src;
2069 }
2070 \f
2071 /* Add INC into TARGET.  */
2072
2073 void
2074 expand_inc (rtx target, rtx inc)
2075 {
2076   rtx value = expand_binop (GET_MODE (target), add_optab,
2077                             target, inc,
2078                             target, 0, OPTAB_LIB_WIDEN);
2079   if (value != target)
2080     emit_move_insn (target, value);
2081 }
2082
2083 /* Subtract DEC from TARGET.  */
2084
2085 void
2086 expand_dec (rtx target, rtx dec)
2087 {
2088   rtx value = expand_binop (GET_MODE (target), sub_optab,
2089                             target, dec,
2090                             target, 0, OPTAB_LIB_WIDEN);
2091   if (value != target)
2092     emit_move_insn (target, value);
2093 }
2094 \f
2095 /* Output a shift instruction for expression code CODE,
2096    with SHIFTED being the rtx for the value to shift,
2097    and AMOUNT the rtx for the amount to shift by.
2098    Store the result in the rtx TARGET, if that is convenient.
2099    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2100    Return the rtx for where the value is.  */
2101
2102 static rtx
2103 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2104                 rtx amount, rtx target, int unsignedp)
2105 {
2106   rtx op1, temp = 0;
2107   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2108   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2109   optab lshift_optab = ashl_optab;
2110   optab rshift_arith_optab = ashr_optab;
2111   optab rshift_uns_optab = lshr_optab;
2112   optab lrotate_optab = rotl_optab;
2113   optab rrotate_optab = rotr_optab;
2114   machine_mode op1_mode;
2115   machine_mode scalar_mode = mode;
2116   int attempt;
2117   bool speed = optimize_insn_for_speed_p ();
2118
2119   if (VECTOR_MODE_P (mode))
2120     scalar_mode = GET_MODE_INNER (mode);
2121   op1 = amount;
2122   op1_mode = GET_MODE (op1);
2123
2124   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2125      shift amount is a vector, use the vector/vector shift patterns.  */
2126   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2127     {
2128       lshift_optab = vashl_optab;
2129       rshift_arith_optab = vashr_optab;
2130       rshift_uns_optab = vlshr_optab;
2131       lrotate_optab = vrotl_optab;
2132       rrotate_optab = vrotr_optab;
2133     }
2134
2135   /* Previously detected shift-counts computed by NEGATE_EXPR
2136      and shifted in the other direction; but that does not work
2137      on all machines.  */
2138
2139   if (SHIFT_COUNT_TRUNCATED)
2140     {
2141       if (CONST_INT_P (op1)
2142           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2143               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2144         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2145                        % GET_MODE_BITSIZE (scalar_mode));
2146       else if (GET_CODE (op1) == SUBREG
2147                && subreg_lowpart_p (op1)
2148                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2149                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2150         op1 = SUBREG_REG (op1);
2151     }
2152
2153   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2154      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2155      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2156      amount instead.  */
2157   if (rotate
2158       && CONST_INT_P (op1)
2159       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2160                    GET_MODE_BITSIZE (scalar_mode) - 1))
2161     {
2162       op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2163       left = !left;
2164       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2165     }
2166
2167   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2168      Note that this is not the case for bigger values.  For instance a rotation
2169      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2170      0x04030201 (bswapsi).  */
2171   if (rotate
2172       && CONST_INT_P (op1)
2173       && INTVAL (op1) == BITS_PER_UNIT
2174       && GET_MODE_SIZE (scalar_mode) == 2
2175       && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2176     return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2177                                   unsignedp);
2178
2179   if (op1 == const0_rtx)
2180     return shifted;
2181
2182   /* Check whether its cheaper to implement a left shift by a constant
2183      bit count by a sequence of additions.  */
2184   if (code == LSHIFT_EXPR
2185       && CONST_INT_P (op1)
2186       && INTVAL (op1) > 0
2187       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2188       && INTVAL (op1) < MAX_BITS_PER_WORD
2189       && (shift_cost (speed, mode, INTVAL (op1))
2190           > INTVAL (op1) * add_cost (speed, mode))
2191       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2192     {
2193       int i;
2194       for (i = 0; i < INTVAL (op1); i++)
2195         {
2196           temp = force_reg (mode, shifted);
2197           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2198                                   unsignedp, OPTAB_LIB_WIDEN);
2199         }
2200       return shifted;
2201     }
2202
2203   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2204     {
2205       enum optab_methods methods;
2206
2207       if (attempt == 0)
2208         methods = OPTAB_DIRECT;
2209       else if (attempt == 1)
2210         methods = OPTAB_WIDEN;
2211       else
2212         methods = OPTAB_LIB_WIDEN;
2213
2214       if (rotate)
2215         {
2216           /* Widening does not work for rotation.  */
2217           if (methods == OPTAB_WIDEN)
2218             continue;
2219           else if (methods == OPTAB_LIB_WIDEN)
2220             {
2221               /* If we have been unable to open-code this by a rotation,
2222                  do it as the IOR of two shifts.  I.e., to rotate A
2223                  by N bits, compute
2224                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2225                  where C is the bitsize of A.
2226
2227                  It is theoretically possible that the target machine might
2228                  not be able to perform either shift and hence we would
2229                  be making two libcalls rather than just the one for the
2230                  shift (similarly if IOR could not be done).  We will allow
2231                  this extremely unlikely lossage to avoid complicating the
2232                  code below.  */
2233
2234               rtx subtarget = target == shifted ? 0 : target;
2235               rtx new_amount, other_amount;
2236               rtx temp1;
2237
2238               new_amount = op1;
2239               if (op1 == const0_rtx)
2240                 return shifted;
2241               else if (CONST_INT_P (op1))
2242                 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2243                                         - INTVAL (op1));
2244               else
2245                 {
2246                   other_amount
2247                     = simplify_gen_unary (NEG, GET_MODE (op1),
2248                                           op1, GET_MODE (op1));
2249                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2250                   other_amount
2251                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2252                                            gen_int_mode (mask, GET_MODE (op1)));
2253                 }
2254
2255               shifted = force_reg (mode, shifted);
2256
2257               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2258                                      mode, shifted, new_amount, 0, 1);
2259               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2260                                       mode, shifted, other_amount,
2261                                       subtarget, 1);
2262               return expand_binop (mode, ior_optab, temp, temp1, target,
2263                                    unsignedp, methods);
2264             }
2265
2266           temp = expand_binop (mode,
2267                                left ? lrotate_optab : rrotate_optab,
2268                                shifted, op1, target, unsignedp, methods);
2269         }
2270       else if (unsignedp)
2271         temp = expand_binop (mode,
2272                              left ? lshift_optab : rshift_uns_optab,
2273                              shifted, op1, target, unsignedp, methods);
2274
2275       /* Do arithmetic shifts.
2276          Also, if we are going to widen the operand, we can just as well
2277          use an arithmetic right-shift instead of a logical one.  */
2278       if (temp == 0 && ! rotate
2279           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2280         {
2281           enum optab_methods methods1 = methods;
2282
2283           /* If trying to widen a log shift to an arithmetic shift,
2284              don't accept an arithmetic shift of the same size.  */
2285           if (unsignedp)
2286             methods1 = OPTAB_MUST_WIDEN;
2287
2288           /* Arithmetic shift */
2289
2290           temp = expand_binop (mode,
2291                                left ? lshift_optab : rshift_arith_optab,
2292                                shifted, op1, target, unsignedp, methods1);
2293         }
2294
2295       /* We used to try extzv here for logical right shifts, but that was
2296          only useful for one machine, the VAX, and caused poor code
2297          generation there for lshrdi3, so the code was deleted and a
2298          define_expand for lshrsi3 was added to vax.md.  */
2299     }
2300
2301   gcc_assert (temp);
2302   return temp;
2303 }
2304
2305 /* Output a shift instruction for expression code CODE,
2306    with SHIFTED being the rtx for the value to shift,
2307    and AMOUNT the amount to shift by.
2308    Store the result in the rtx TARGET, if that is convenient.
2309    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2310    Return the rtx for where the value is.  */
2311
2312 rtx
2313 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2314               int amount, rtx target, int unsignedp)
2315 {
2316   return expand_shift_1 (code, mode,
2317                          shifted, GEN_INT (amount), target, unsignedp);
2318 }
2319
2320 /* Output a shift instruction for expression code CODE,
2321    with SHIFTED being the rtx for the value to shift,
2322    and AMOUNT the tree for the amount to shift by.
2323    Store the result in the rtx TARGET, if that is convenient.
2324    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2325    Return the rtx for where the value is.  */
2326
2327 rtx
2328 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2329                        tree amount, rtx target, int unsignedp)
2330 {
2331   return expand_shift_1 (code, mode,
2332                          shifted, expand_normal (amount), target, unsignedp);
2333 }
2334
2335 \f
2336 /* Indicates the type of fixup needed after a constant multiplication.
2337    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2338    the result should be negated, and ADD_VARIANT means that the
2339    multiplicand should be added to the result.  */
2340 enum mult_variant {basic_variant, negate_variant, add_variant};
2341
2342 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2343                         const struct mult_cost *, machine_mode mode);
2344 static bool choose_mult_variant (machine_mode, HOST_WIDE_INT,
2345                                  struct algorithm *, enum mult_variant *, int);
2346 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2347                               const struct algorithm *, enum mult_variant);
2348 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2349 static rtx extract_high_half (machine_mode, rtx);
2350 static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2351 static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
2352                                        int, int);
2353 /* Compute and return the best algorithm for multiplying by T.
2354    The algorithm must cost less than cost_limit
2355    If retval.cost >= COST_LIMIT, no algorithm was found and all
2356    other field of the returned struct are undefined.
2357    MODE is the machine mode of the multiplication.  */
2358
2359 static void
2360 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2361             const struct mult_cost *cost_limit, machine_mode mode)
2362 {
2363   int m;
2364   struct algorithm *alg_in, *best_alg;
2365   struct mult_cost best_cost;
2366   struct mult_cost new_limit;
2367   int op_cost, op_latency;
2368   unsigned HOST_WIDE_INT orig_t = t;
2369   unsigned HOST_WIDE_INT q;
2370   int maxm, hash_index;
2371   bool cache_hit = false;
2372   enum alg_code cache_alg = alg_zero;
2373   bool speed = optimize_insn_for_speed_p ();
2374   machine_mode imode;
2375   struct alg_hash_entry *entry_ptr;
2376
2377   /* Indicate that no algorithm is yet found.  If no algorithm
2378      is found, this value will be returned and indicate failure.  */
2379   alg_out->cost.cost = cost_limit->cost + 1;
2380   alg_out->cost.latency = cost_limit->latency + 1;
2381
2382   if (cost_limit->cost < 0
2383       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2384     return;
2385
2386   /* Be prepared for vector modes.  */
2387   imode = GET_MODE_INNER (mode);
2388   if (imode == VOIDmode)
2389     imode = mode;
2390
2391   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2392
2393   /* Restrict the bits of "t" to the multiplication's mode.  */
2394   t &= GET_MODE_MASK (imode);
2395
2396   /* t == 1 can be done in zero cost.  */
2397   if (t == 1)
2398     {
2399       alg_out->ops = 1;
2400       alg_out->cost.cost = 0;
2401       alg_out->cost.latency = 0;
2402       alg_out->op[0] = alg_m;
2403       return;
2404     }
2405
2406   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2407      fail now.  */
2408   if (t == 0)
2409     {
2410       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2411         return;
2412       else
2413         {
2414           alg_out->ops = 1;
2415           alg_out->cost.cost = zero_cost (speed);
2416           alg_out->cost.latency = zero_cost (speed);
2417           alg_out->op[0] = alg_zero;
2418           return;
2419         }
2420     }
2421
2422   /* We'll be needing a couple extra algorithm structures now.  */
2423
2424   alg_in = XALLOCA (struct algorithm);
2425   best_alg = XALLOCA (struct algorithm);
2426   best_cost = *cost_limit;
2427
2428   /* Compute the hash index.  */
2429   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2430
2431   /* See if we already know what to do for T.  */
2432   entry_ptr = alg_hash_entry_ptr (hash_index);
2433   if (entry_ptr->t == t
2434       && entry_ptr->mode == mode
2435       && entry_ptr->mode == mode
2436       && entry_ptr->speed == speed
2437       && entry_ptr->alg != alg_unknown)
2438     {
2439       cache_alg = entry_ptr->alg;
2440
2441       if (cache_alg == alg_impossible)
2442         {
2443           /* The cache tells us that it's impossible to synthesize
2444              multiplication by T within entry_ptr->cost.  */
2445           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2446             /* COST_LIMIT is at least as restrictive as the one
2447                recorded in the hash table, in which case we have no
2448                hope of synthesizing a multiplication.  Just
2449                return.  */
2450             return;
2451
2452           /* If we get here, COST_LIMIT is less restrictive than the
2453              one recorded in the hash table, so we may be able to
2454              synthesize a multiplication.  Proceed as if we didn't
2455              have the cache entry.  */
2456         }
2457       else
2458         {
2459           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2460             /* The cached algorithm shows that this multiplication
2461                requires more cost than COST_LIMIT.  Just return.  This
2462                way, we don't clobber this cache entry with
2463                alg_impossible but retain useful information.  */
2464             return;
2465
2466           cache_hit = true;
2467
2468           switch (cache_alg)
2469             {
2470             case alg_shift:
2471               goto do_alg_shift;
2472
2473             case alg_add_t_m2:
2474             case alg_sub_t_m2:
2475               goto do_alg_addsub_t_m2;
2476
2477             case alg_add_factor:
2478             case alg_sub_factor:
2479               goto do_alg_addsub_factor;
2480
2481             case alg_add_t2_m:
2482               goto do_alg_add_t2_m;
2483
2484             case alg_sub_t2_m:
2485               goto do_alg_sub_t2_m;
2486
2487             default:
2488               gcc_unreachable ();
2489             }
2490         }
2491     }
2492
2493   /* If we have a group of zero bits at the low-order part of T, try
2494      multiplying by the remaining bits and then doing a shift.  */
2495
2496   if ((t & 1) == 0)
2497     {
2498     do_alg_shift:
2499       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2500       if (m < maxm)
2501         {
2502           q = t >> m;
2503           /* The function expand_shift will choose between a shift and
2504              a sequence of additions, so the observed cost is given as
2505              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2506           op_cost = m * add_cost (speed, mode);
2507           if (shift_cost (speed, mode, m) < op_cost)
2508             op_cost = shift_cost (speed, mode, m);
2509           new_limit.cost = best_cost.cost - op_cost;
2510           new_limit.latency = best_cost.latency - op_cost;
2511           synth_mult (alg_in, q, &new_limit, mode);
2512
2513           alg_in->cost.cost += op_cost;
2514           alg_in->cost.latency += op_cost;
2515           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2516             {
2517               struct algorithm *x;
2518               best_cost = alg_in->cost;
2519               x = alg_in, alg_in = best_alg, best_alg = x;
2520               best_alg->log[best_alg->ops] = m;
2521               best_alg->op[best_alg->ops] = alg_shift;
2522             }
2523
2524           /* See if treating ORIG_T as a signed number yields a better
2525              sequence.  Try this sequence only for a negative ORIG_T
2526              as it would be useless for a non-negative ORIG_T.  */
2527           if ((HOST_WIDE_INT) orig_t < 0)
2528             {
2529               /* Shift ORIG_T as follows because a right shift of a
2530                  negative-valued signed type is implementation
2531                  defined.  */
2532               q = ~(~orig_t >> m);
2533               /* The function expand_shift will choose between a shift
2534                  and a sequence of additions, so the observed cost is
2535                  given as MIN (m * add_cost(speed, mode),
2536                  shift_cost(speed, mode, m)).  */
2537               op_cost = m * add_cost (speed, mode);
2538               if (shift_cost (speed, mode, m) < op_cost)
2539                 op_cost = shift_cost (speed, mode, m);
2540               new_limit.cost = best_cost.cost - op_cost;
2541               new_limit.latency = best_cost.latency - op_cost;
2542               synth_mult (alg_in, q, &new_limit, mode);
2543
2544               alg_in->cost.cost += op_cost;
2545               alg_in->cost.latency += op_cost;
2546               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2547                 {
2548                   struct algorithm *x;
2549                   best_cost = alg_in->cost;
2550                   x = alg_in, alg_in = best_alg, best_alg = x;
2551                   best_alg->log[best_alg->ops] = m;
2552                   best_alg->op[best_alg->ops] = alg_shift;
2553                 }
2554             }
2555         }
2556       if (cache_hit)
2557         goto done;
2558     }
2559
2560   /* If we have an odd number, add or subtract one.  */
2561   if ((t & 1) != 0)
2562     {
2563       unsigned HOST_WIDE_INT w;
2564
2565     do_alg_addsub_t_m2:
2566       for (w = 1; (w & t) != 0; w <<= 1)
2567         ;
2568       /* If T was -1, then W will be zero after the loop.  This is another
2569          case where T ends with ...111.  Handling this with (T + 1) and
2570          subtract 1 produces slightly better code and results in algorithm
2571          selection much faster than treating it like the ...0111 case
2572          below.  */
2573       if (w == 0
2574           || (w > 2
2575               /* Reject the case where t is 3.
2576                  Thus we prefer addition in that case.  */
2577               && t != 3))
2578         {
2579           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2580
2581           op_cost = add_cost (speed, mode);
2582           new_limit.cost = best_cost.cost - op_cost;
2583           new_limit.latency = best_cost.latency - op_cost;
2584           synth_mult (alg_in, t + 1, &new_limit, mode);
2585
2586           alg_in->cost.cost += op_cost;
2587           alg_in->cost.latency += op_cost;
2588           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2589             {
2590               struct algorithm *x;
2591               best_cost = alg_in->cost;
2592               x = alg_in, alg_in = best_alg, best_alg = x;
2593               best_alg->log[best_alg->ops] = 0;
2594               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2595             }
2596         }
2597       else
2598         {
2599           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2600
2601           op_cost = add_cost (speed, mode);
2602           new_limit.cost = best_cost.cost - op_cost;
2603           new_limit.latency = best_cost.latency - op_cost;
2604           synth_mult (alg_in, t - 1, &new_limit, mode);
2605
2606           alg_in->cost.cost += op_cost;
2607           alg_in->cost.latency += op_cost;
2608           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2609             {
2610               struct algorithm *x;
2611               best_cost = alg_in->cost;
2612               x = alg_in, alg_in = best_alg, best_alg = x;
2613               best_alg->log[best_alg->ops] = 0;
2614               best_alg->op[best_alg->ops] = alg_add_t_m2;
2615             }
2616         }
2617
2618       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2619          quickly with a - a * n for some appropriate constant n.  */
2620       m = exact_log2 (-orig_t + 1);
2621       if (m >= 0 && m < maxm)
2622         {
2623           op_cost = shiftsub1_cost (speed, mode, m);
2624           new_limit.cost = best_cost.cost - op_cost;
2625           new_limit.latency = best_cost.latency - op_cost;
2626           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2627                       &new_limit, mode);
2628
2629           alg_in->cost.cost += op_cost;
2630           alg_in->cost.latency += op_cost;
2631           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2632             {
2633               struct algorithm *x;
2634               best_cost = alg_in->cost;
2635               x = alg_in, alg_in = best_alg, best_alg = x;
2636               best_alg->log[best_alg->ops] = m;
2637               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2638             }
2639         }
2640
2641       if (cache_hit)
2642         goto done;
2643     }
2644
2645   /* Look for factors of t of the form
2646      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2647      If we find such a factor, we can multiply by t using an algorithm that
2648      multiplies by q, shift the result by m and add/subtract it to itself.
2649
2650      We search for large factors first and loop down, even if large factors
2651      are less probable than small; if we find a large factor we will find a
2652      good sequence quickly, and therefore be able to prune (by decreasing
2653      COST_LIMIT) the search.  */
2654
2655  do_alg_addsub_factor:
2656   for (m = floor_log2 (t - 1); m >= 2; m--)
2657     {
2658       unsigned HOST_WIDE_INT d;
2659
2660       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2661       if (t % d == 0 && t > d && m < maxm
2662           && (!cache_hit || cache_alg == alg_add_factor))
2663         {
2664           /* If the target has a cheap shift-and-add instruction use
2665              that in preference to a shift insn followed by an add insn.
2666              Assume that the shift-and-add is "atomic" with a latency
2667              equal to its cost, otherwise assume that on superscalar
2668              hardware the shift may be executed concurrently with the
2669              earlier steps in the algorithm.  */
2670           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2671           if (shiftadd_cost (speed, mode, m) < op_cost)
2672             {
2673               op_cost = shiftadd_cost (speed, mode, m);
2674               op_latency = op_cost;
2675             }
2676           else
2677             op_latency = add_cost (speed, mode);
2678
2679           new_limit.cost = best_cost.cost - op_cost;
2680           new_limit.latency = best_cost.latency - op_latency;
2681           synth_mult (alg_in, t / d, &new_limit, mode);
2682
2683           alg_in->cost.cost += op_cost;
2684           alg_in->cost.latency += op_latency;
2685           if (alg_in->cost.latency < op_cost)
2686             alg_in->cost.latency = op_cost;
2687           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2688             {
2689               struct algorithm *x;
2690               best_cost = alg_in->cost;
2691               x = alg_in, alg_in = best_alg, best_alg = x;
2692               best_alg->log[best_alg->ops] = m;
2693               best_alg->op[best_alg->ops] = alg_add_factor;
2694             }
2695           /* Other factors will have been taken care of in the recursion.  */
2696           break;
2697         }
2698
2699       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2700       if (t % d == 0 && t > d && m < maxm
2701           && (!cache_hit || cache_alg == alg_sub_factor))
2702         {
2703           /* If the target has a cheap shift-and-subtract insn use
2704              that in preference to a shift insn followed by a sub insn.
2705              Assume that the shift-and-sub is "atomic" with a latency
2706              equal to it's cost, otherwise assume that on superscalar
2707              hardware the shift may be executed concurrently with the
2708              earlier steps in the algorithm.  */
2709           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2710           if (shiftsub0_cost (speed, mode, m) < op_cost)
2711             {
2712               op_cost = shiftsub0_cost (speed, mode, m);
2713               op_latency = op_cost;
2714             }
2715           else
2716             op_latency = add_cost (speed, mode);
2717
2718           new_limit.cost = best_cost.cost - op_cost;
2719           new_limit.latency = best_cost.latency - op_latency;
2720           synth_mult (alg_in, t / d, &new_limit, mode);
2721
2722           alg_in->cost.cost += op_cost;
2723           alg_in->cost.latency += op_latency;
2724           if (alg_in->cost.latency < op_cost)
2725             alg_in->cost.latency = op_cost;
2726           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2727             {
2728               struct algorithm *x;
2729               best_cost = alg_in->cost;
2730               x = alg_in, alg_in = best_alg, best_alg = x;
2731               best_alg->log[best_alg->ops] = m;
2732               best_alg->op[best_alg->ops] = alg_sub_factor;
2733             }
2734           break;
2735         }
2736     }
2737   if (cache_hit)
2738     goto done;
2739
2740   /* Try shift-and-add (load effective address) instructions,
2741      i.e. do a*3, a*5, a*9.  */
2742   if ((t & 1) != 0)
2743     {
2744     do_alg_add_t2_m:
2745       q = t - 1;
2746       q = q & -q;
2747       m = exact_log2 (q);
2748       if (m >= 0 && m < maxm)
2749         {
2750           op_cost = shiftadd_cost (speed, mode, m);
2751           new_limit.cost = best_cost.cost - op_cost;
2752           new_limit.latency = best_cost.latency - op_cost;
2753           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2754
2755           alg_in->cost.cost += op_cost;
2756           alg_in->cost.latency += op_cost;
2757           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2758             {
2759               struct algorithm *x;
2760               best_cost = alg_in->cost;
2761               x = alg_in, alg_in = best_alg, best_alg = x;
2762               best_alg->log[best_alg->ops] = m;
2763               best_alg->op[best_alg->ops] = alg_add_t2_m;
2764             }
2765         }
2766       if (cache_hit)
2767         goto done;
2768
2769     do_alg_sub_t2_m:
2770       q = t + 1;
2771       q = q & -q;
2772       m = exact_log2 (q);
2773       if (m >= 0 && m < maxm)
2774         {
2775           op_cost = shiftsub0_cost (speed, mode, m);
2776           new_limit.cost = best_cost.cost - op_cost;
2777           new_limit.latency = best_cost.latency - op_cost;
2778           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2779
2780           alg_in->cost.cost += op_cost;
2781           alg_in->cost.latency += op_cost;
2782           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2783             {
2784               struct algorithm *x;
2785               best_cost = alg_in->cost;
2786               x = alg_in, alg_in = best_alg, best_alg = x;
2787               best_alg->log[best_alg->ops] = m;
2788               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2789             }
2790         }
2791       if (cache_hit)
2792         goto done;
2793     }
2794
2795  done:
2796   /* If best_cost has not decreased, we have not found any algorithm.  */
2797   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2798     {
2799       /* We failed to find an algorithm.  Record alg_impossible for
2800          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2801          we are asked to find an algorithm for T within the same or
2802          lower COST_LIMIT, we can immediately return to the
2803          caller.  */
2804       entry_ptr->t = t;
2805       entry_ptr->mode = mode;
2806       entry_ptr->speed = speed;
2807       entry_ptr->alg = alg_impossible;
2808       entry_ptr->cost = *cost_limit;
2809       return;
2810     }
2811
2812   /* Cache the result.  */
2813   if (!cache_hit)
2814     {
2815       entry_ptr->t = t;
2816       entry_ptr->mode = mode;
2817       entry_ptr->speed = speed;
2818       entry_ptr->alg = best_alg->op[best_alg->ops];
2819       entry_ptr->cost.cost = best_cost.cost;
2820       entry_ptr->cost.latency = best_cost.latency;
2821     }
2822
2823   /* If we are getting a too long sequence for `struct algorithm'
2824      to record, make this search fail.  */
2825   if (best_alg->ops == MAX_BITS_PER_WORD)
2826     return;
2827
2828   /* Copy the algorithm from temporary space to the space at alg_out.
2829      We avoid using structure assignment because the majority of
2830      best_alg is normally undefined, and this is a critical function.  */
2831   alg_out->ops = best_alg->ops + 1;
2832   alg_out->cost = best_cost;
2833   memcpy (alg_out->op, best_alg->op,
2834           alg_out->ops * sizeof *alg_out->op);
2835   memcpy (alg_out->log, best_alg->log,
2836           alg_out->ops * sizeof *alg_out->log);
2837 }
2838 \f
2839 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2840    Try three variations:
2841
2842        - a shift/add sequence based on VAL itself
2843        - a shift/add sequence based on -VAL, followed by a negation
2844        - a shift/add sequence based on VAL - 1, followed by an addition.
2845
2846    Return true if the cheapest of these cost less than MULT_COST,
2847    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2848
2849 static bool
2850 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
2851                      struct algorithm *alg, enum mult_variant *variant,
2852                      int mult_cost)
2853 {
2854   struct algorithm alg2;
2855   struct mult_cost limit;
2856   int op_cost;
2857   bool speed = optimize_insn_for_speed_p ();
2858
2859   /* Fail quickly for impossible bounds.  */
2860   if (mult_cost < 0)
2861     return false;
2862
2863   /* Ensure that mult_cost provides a reasonable upper bound.
2864      Any constant multiplication can be performed with less
2865      than 2 * bits additions.  */
2866   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2867   if (mult_cost > op_cost)
2868     mult_cost = op_cost;
2869
2870   *variant = basic_variant;
2871   limit.cost = mult_cost;
2872   limit.latency = mult_cost;
2873   synth_mult (alg, val, &limit, mode);
2874
2875   /* This works only if the inverted value actually fits in an
2876      `unsigned int' */
2877   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2878     {
2879       op_cost = neg_cost (speed, mode);
2880       if (MULT_COST_LESS (&alg->cost, mult_cost))
2881         {
2882           limit.cost = alg->cost.cost - op_cost;
2883           limit.latency = alg->cost.latency - op_cost;
2884         }
2885       else
2886         {
2887           limit.cost = mult_cost - op_cost;
2888           limit.latency = mult_cost - op_cost;
2889         }
2890
2891       synth_mult (&alg2, -val, &limit, mode);
2892       alg2.cost.cost += op_cost;
2893       alg2.cost.latency += op_cost;
2894       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2895         *alg = alg2, *variant = negate_variant;
2896     }
2897
2898   /* This proves very useful for division-by-constant.  */
2899   op_cost = add_cost (speed, mode);
2900   if (MULT_COST_LESS (&alg->cost, mult_cost))
2901     {
2902       limit.cost = alg->cost.cost - op_cost;
2903       limit.latency = alg->cost.latency - op_cost;
2904     }
2905   else
2906     {
2907       limit.cost = mult_cost - op_cost;
2908       limit.latency = mult_cost - op_cost;
2909     }
2910
2911   synth_mult (&alg2, val - 1, &limit, mode);
2912   alg2.cost.cost += op_cost;
2913   alg2.cost.latency += op_cost;
2914   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2915     *alg = alg2, *variant = add_variant;
2916
2917   return MULT_COST_LESS (&alg->cost, mult_cost);
2918 }
2919
2920 /* A subroutine of expand_mult, used for constant multiplications.
2921    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2922    convenient.  Use the shift/add sequence described by ALG and apply
2923    the final fixup specified by VARIANT.  */
2924
2925 static rtx
2926 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
2927                    rtx target, const struct algorithm *alg,
2928                    enum mult_variant variant)
2929 {
2930   HOST_WIDE_INT val_so_far;
2931   rtx_insn *insn;
2932   rtx accum, tem;
2933   int opno;
2934   machine_mode nmode;
2935
2936   /* Avoid referencing memory over and over and invalid sharing
2937      on SUBREGs.  */
2938   op0 = force_reg (mode, op0);
2939
2940   /* ACCUM starts out either as OP0 or as a zero, depending on
2941      the first operation.  */
2942
2943   if (alg->op[0] == alg_zero)
2944     {
2945       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2946       val_so_far = 0;
2947     }
2948   else if (alg->op[0] == alg_m)
2949     {
2950       accum = copy_to_mode_reg (mode, op0);
2951       val_so_far = 1;
2952     }
2953   else
2954     gcc_unreachable ();
2955
2956   for (opno = 1; opno < alg->ops; opno++)
2957     {
2958       int log = alg->log[opno];
2959       rtx shift_subtarget = optimize ? 0 : accum;
2960       rtx add_target
2961         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2962            && !optimize)
2963           ? target : 0;
2964       rtx accum_target = optimize ? 0 : accum;
2965       rtx accum_inner;
2966
2967       switch (alg->op[opno])
2968         {
2969         case alg_shift:
2970           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2971           /* REG_EQUAL note will be attached to the following insn.  */
2972           emit_move_insn (accum, tem);
2973           val_so_far <<= log;
2974           break;
2975
2976         case alg_add_t_m2:
2977           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2978           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2979                                  add_target ? add_target : accum_target);
2980           val_so_far += (HOST_WIDE_INT) 1 << log;
2981           break;
2982
2983         case alg_sub_t_m2:
2984           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2985           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2986                                  add_target ? add_target : accum_target);
2987           val_so_far -= (HOST_WIDE_INT) 1 << log;
2988           break;
2989
2990         case alg_add_t2_m:
2991           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2992                                 log, shift_subtarget, 0);
2993           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2994                                  add_target ? add_target : accum_target);
2995           val_so_far = (val_so_far << log) + 1;
2996           break;
2997
2998         case alg_sub_t2_m:
2999           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3000                                 log, shift_subtarget, 0);
3001           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3002                                  add_target ? add_target : accum_target);
3003           val_so_far = (val_so_far << log) - 1;
3004           break;
3005
3006         case alg_add_factor:
3007           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3008           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3009                                  add_target ? add_target : accum_target);
3010           val_so_far += val_so_far << log;
3011           break;
3012
3013         case alg_sub_factor:
3014           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3015           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3016                                  (add_target
3017                                   ? add_target : (optimize ? 0 : tem)));
3018           val_so_far = (val_so_far << log) - val_so_far;
3019           break;
3020
3021         default:
3022           gcc_unreachable ();
3023         }
3024
3025       if (SCALAR_INT_MODE_P (mode))
3026         {
3027           /* Write a REG_EQUAL note on the last insn so that we can cse
3028              multiplication sequences.  Note that if ACCUM is a SUBREG,
3029              we've set the inner register and must properly indicate that.  */
3030           tem = op0, nmode = mode;
3031           accum_inner = accum;
3032           if (GET_CODE (accum) == SUBREG)
3033             {
3034               accum_inner = SUBREG_REG (accum);
3035               nmode = GET_MODE (accum_inner);
3036               tem = gen_lowpart (nmode, op0);
3037             }
3038
3039           insn = get_last_insn ();
3040           set_dst_reg_note (insn, REG_EQUAL,
3041                             gen_rtx_MULT (nmode, tem,
3042                                           gen_int_mode (val_so_far, nmode)),
3043                             accum_inner);
3044         }
3045     }
3046
3047   if (variant == negate_variant)
3048     {
3049       val_so_far = -val_so_far;
3050       accum = expand_unop (mode, neg_optab, accum, target, 0);
3051     }
3052   else if (variant == add_variant)
3053     {
3054       val_so_far = val_so_far + 1;
3055       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3056     }
3057
3058   /* Compare only the bits of val and val_so_far that are significant
3059      in the result mode, to avoid sign-/zero-extension confusion.  */
3060   nmode = GET_MODE_INNER (mode);
3061   if (nmode == VOIDmode)
3062     nmode = mode;
3063   val &= GET_MODE_MASK (nmode);
3064   val_so_far &= GET_MODE_MASK (nmode);
3065   gcc_assert (val == val_so_far);
3066
3067   return accum;
3068 }
3069
3070 /* Perform a multiplication and return an rtx for the result.
3071    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3072    TARGET is a suggestion for where to store the result (an rtx).
3073
3074    We check specially for a constant integer as OP1.
3075    If you want this check for OP0 as well, then before calling
3076    you should swap the two operands if OP0 would be constant.  */
3077
3078 rtx
3079 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3080              int unsignedp)
3081 {
3082   enum mult_variant variant;
3083   struct algorithm algorithm;
3084   rtx scalar_op1;
3085   int max_cost;
3086   bool speed = optimize_insn_for_speed_p ();
3087   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3088
3089   if (CONSTANT_P (op0))
3090     std::swap (op0, op1);
3091
3092   /* For vectors, there are several simplifications that can be made if
3093      all elements of the vector constant are identical.  */
3094   scalar_op1 = op1;
3095   if (GET_CODE (op1) == CONST_VECTOR)
3096     {
3097       int i, n = CONST_VECTOR_NUNITS (op1);
3098       scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3099       for (i = 1; i < n; ++i)
3100         if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3101           goto skip_scalar;
3102     }
3103
3104   if (INTEGRAL_MODE_P (mode))
3105     {
3106       rtx fake_reg;
3107       HOST_WIDE_INT coeff;
3108       bool is_neg;
3109       int mode_bitsize;
3110
3111       if (op1 == CONST0_RTX (mode))
3112         return op1;
3113       if (op1 == CONST1_RTX (mode))
3114         return op0;
3115       if (op1 == CONSTM1_RTX (mode))
3116         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3117                             op0, target, 0);
3118
3119       if (do_trapv)
3120         goto skip_synth;
3121
3122       /* If mode is integer vector mode, check if the backend supports
3123          vector lshift (by scalar or vector) at all.  If not, we can't use
3124          synthetized multiply.  */
3125       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3126           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3127           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3128         goto skip_synth;
3129
3130       /* These are the operations that are potentially turned into
3131          a sequence of shifts and additions.  */
3132       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3133
3134       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3135          less than or equal in size to `unsigned int' this doesn't matter.
3136          If the mode is larger than `unsigned int', then synth_mult works
3137          only if the constant value exactly fits in an `unsigned int' without
3138          any truncation.  This means that multiplying by negative values does
3139          not work; results are off by 2^32 on a 32 bit machine.  */
3140       if (CONST_INT_P (scalar_op1))
3141         {
3142           coeff = INTVAL (scalar_op1);
3143           is_neg = coeff < 0;
3144         }
3145 #if TARGET_SUPPORTS_WIDE_INT
3146       else if (CONST_WIDE_INT_P (scalar_op1))
3147 #else
3148       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3149 #endif
3150         {
3151           int shift = wi::exact_log2 (std::make_pair (scalar_op1, mode));
3152           /* Perfect power of 2 (other than 1, which is handled above).  */
3153           if (shift > 0)
3154             return expand_shift (LSHIFT_EXPR, mode, op0,
3155                                  shift, target, unsignedp);
3156           else
3157             goto skip_synth;
3158         }
3159       else
3160         goto skip_synth;
3161
3162       /* We used to test optimize here, on the grounds that it's better to
3163          produce a smaller program when -O is not used.  But this causes
3164          such a terrible slowdown sometimes that it seems better to always
3165          use synth_mult.  */
3166
3167       /* Special case powers of two.  */
3168       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3169           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3170         return expand_shift (LSHIFT_EXPR, mode, op0,
3171                              floor_log2 (coeff), target, unsignedp);
3172
3173       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3174
3175       /* Attempt to handle multiplication of DImode values by negative
3176          coefficients, by performing the multiplication by a positive
3177          multiplier and then inverting the result.  */
3178       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3179         {
3180           /* Its safe to use -coeff even for INT_MIN, as the
3181              result is interpreted as an unsigned coefficient.
3182              Exclude cost of op0 from max_cost to match the cost
3183              calculation of the synth_mult.  */
3184           coeff = -(unsigned HOST_WIDE_INT) coeff;
3185           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3186                       - neg_cost (speed, mode));
3187           if (max_cost <= 0)
3188             goto skip_synth;
3189
3190           /* Special case powers of two.  */
3191           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3192             {
3193               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3194                                        floor_log2 (coeff), target, unsignedp);
3195               return expand_unop (mode, neg_optab, temp, target, 0);
3196             }
3197
3198           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3199                                    max_cost))
3200             {
3201               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3202                                             &algorithm, variant);
3203               return expand_unop (mode, neg_optab, temp, target, 0);
3204             }
3205           goto skip_synth;
3206         }
3207
3208       /* Exclude cost of op0 from max_cost to match the cost
3209          calculation of the synth_mult.  */
3210       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3211       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3212         return expand_mult_const (mode, op0, coeff, target,
3213                                   &algorithm, variant);
3214     }
3215  skip_synth:
3216
3217   /* Expand x*2.0 as x+x.  */
3218   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3219     {
3220       REAL_VALUE_TYPE d;
3221       REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3222
3223       if (REAL_VALUES_EQUAL (d, dconst2))
3224         {
3225           op0 = force_reg (GET_MODE (op0), op0);
3226           return expand_binop (mode, add_optab, op0, op0,
3227                                target, unsignedp, OPTAB_LIB_WIDEN);
3228         }
3229     }
3230  skip_scalar:
3231
3232   /* This used to use umul_optab if unsigned, but for non-widening multiply
3233      there is no difference between signed and unsigned.  */
3234   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3235                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3236   gcc_assert (op0);
3237   return op0;
3238 }
3239
3240 /* Return a cost estimate for multiplying a register by the given
3241    COEFFicient in the given MODE and SPEED.  */
3242
3243 int
3244 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3245 {
3246   int max_cost;
3247   struct algorithm algorithm;
3248   enum mult_variant variant;
3249
3250   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3251   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3252   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3253     return algorithm.cost.cost;
3254   else
3255     return max_cost;
3256 }
3257
3258 /* Perform a widening multiplication and return an rtx for the result.
3259    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3260    TARGET is a suggestion for where to store the result (an rtx).
3261    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3262    or smul_widen_optab.
3263
3264    We check specially for a constant integer as OP1, comparing the
3265    cost of a widening multiply against the cost of a sequence of shifts
3266    and adds.  */
3267
3268 rtx
3269 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3270                       int unsignedp, optab this_optab)
3271 {
3272   bool speed = optimize_insn_for_speed_p ();
3273   rtx cop1;
3274
3275   if (CONST_INT_P (op1)
3276       && GET_MODE (op0) != VOIDmode
3277       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3278                                 this_optab == umul_widen_optab))
3279       && CONST_INT_P (cop1)
3280       && (INTVAL (cop1) >= 0
3281           || HWI_COMPUTABLE_MODE_P (mode)))
3282     {
3283       HOST_WIDE_INT coeff = INTVAL (cop1);
3284       int max_cost;
3285       enum mult_variant variant;
3286       struct algorithm algorithm;
3287
3288       if (coeff == 0)
3289         return CONST0_RTX (mode);
3290
3291       /* Special case powers of two.  */
3292       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3293         {
3294           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3295           return expand_shift (LSHIFT_EXPR, mode, op0,
3296                                floor_log2 (coeff), target, unsignedp);
3297         }
3298
3299       /* Exclude cost of op0 from max_cost to match the cost
3300          calculation of the synth_mult.  */
3301       max_cost = mul_widen_cost (speed, mode);
3302       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3303                                max_cost))
3304         {
3305           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3306           return expand_mult_const (mode, op0, coeff, target,
3307                                     &algorithm, variant);
3308         }
3309     }
3310   return expand_binop (mode, this_optab, op0, op1, target,
3311                        unsignedp, OPTAB_LIB_WIDEN);
3312 }
3313 \f
3314 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3315    replace division by D, and put the least significant N bits of the result
3316    in *MULTIPLIER_PTR and return the most significant bit.
3317
3318    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3319    needed precision is in PRECISION (should be <= N).
3320
3321    PRECISION should be as small as possible so this function can choose
3322    multiplier more freely.
3323
3324    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3325    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3326
3327    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3328    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3329
3330 unsigned HOST_WIDE_INT
3331 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3332                    unsigned HOST_WIDE_INT *multiplier_ptr,
3333                    int *post_shift_ptr, int *lgup_ptr)
3334 {
3335   int lgup, post_shift;
3336   int pow, pow2;
3337
3338   /* lgup = ceil(log2(divisor)); */
3339   lgup = ceil_log2 (d);
3340
3341   gcc_assert (lgup <= n);
3342
3343   pow = n + lgup;
3344   pow2 = n + lgup - precision;
3345
3346   /* mlow = 2^(N + lgup)/d */
3347   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3348   wide_int mlow = wi::udiv_trunc (val, d);
3349
3350   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3351   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3352   wide_int mhigh = wi::udiv_trunc (val, d);
3353
3354   /* If precision == N, then mlow, mhigh exceed 2^N
3355      (but they do not exceed 2^(N+1)).  */
3356
3357   /* Reduce to lowest terms.  */
3358   for (post_shift = lgup; post_shift > 0; post_shift--)
3359     {
3360       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3361                                                        HOST_BITS_PER_WIDE_INT);
3362       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3363                                                        HOST_BITS_PER_WIDE_INT);
3364       if (ml_lo >= mh_lo)
3365         break;
3366
3367       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3368       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3369     }
3370
3371   *post_shift_ptr = post_shift;
3372   *lgup_ptr = lgup;
3373   if (n < HOST_BITS_PER_WIDE_INT)
3374     {
3375       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3376       *multiplier_ptr = mhigh.to_uhwi () & mask;
3377       return mhigh.to_uhwi () >= mask;
3378     }
3379   else
3380     {
3381       *multiplier_ptr = mhigh.to_uhwi ();
3382       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3383     }
3384 }
3385
3386 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3387    congruent to 1 (mod 2**N).  */
3388
3389 static unsigned HOST_WIDE_INT
3390 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3391 {
3392   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3393
3394   /* The algorithm notes that the choice y = x satisfies
3395      x*y == 1 mod 2^3, since x is assumed odd.
3396      Each iteration doubles the number of bits of significance in y.  */
3397
3398   unsigned HOST_WIDE_INT mask;
3399   unsigned HOST_WIDE_INT y = x;
3400   int nbit = 3;
3401
3402   mask = (n == HOST_BITS_PER_WIDE_INT
3403           ? ~(unsigned HOST_WIDE_INT) 0
3404           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3405
3406   while (nbit < n)
3407     {
3408       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3409       nbit *= 2;
3410     }
3411   return y;
3412 }
3413
3414 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3415    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3416    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3417    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3418    become signed.
3419
3420    The result is put in TARGET if that is convenient.
3421
3422    MODE is the mode of operation.  */
3423
3424 rtx
3425 expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
3426                              rtx op1, rtx target, int unsignedp)
3427 {
3428   rtx tem;
3429   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3430
3431   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3432                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3433   tem = expand_and (mode, tem, op1, NULL_RTX);
3434   adj_operand
3435     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3436                      adj_operand);
3437
3438   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3439                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3440   tem = expand_and (mode, tem, op0, NULL_RTX);
3441   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3442                           target);
3443
3444   return target;
3445 }
3446
3447 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3448
3449 static rtx
3450 extract_high_half (machine_mode mode, rtx op)
3451 {
3452   machine_mode wider_mode;
3453
3454   if (mode == word_mode)
3455     return gen_highpart (mode, op);
3456
3457   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3458
3459   wider_mode = GET_MODE_WIDER_MODE (mode);
3460   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3461                      GET_MODE_BITSIZE (mode), 0, 1);
3462   return convert_modes (mode, wider_mode, op, 0);
3463 }
3464
3465 /* Like expmed_mult_highpart, but only consider using a multiplication
3466    optab.  OP1 is an rtx for the constant operand.  */
3467
3468 static rtx
3469 expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
3470                             rtx target, int unsignedp, int max_cost)
3471 {
3472   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3473   machine_mode wider_mode;
3474   optab moptab;
3475   rtx tem;
3476   int size;
3477   bool speed = optimize_insn_for_speed_p ();
3478
3479   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3480
3481   wider_mode = GET_MODE_WIDER_MODE (mode);
3482   size = GET_MODE_BITSIZE (mode);
3483
3484   /* Firstly, try using a multiplication insn that only generates the needed
3485      high part of the product, and in the sign flavor of unsignedp.  */
3486   if (mul_highpart_cost (speed, mode) < max_cost)
3487     {
3488       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3489       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3490                           unsignedp, OPTAB_DIRECT);
3491       if (tem)
3492         return tem;
3493     }
3494
3495   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3496      Need to adjust the result after the multiplication.  */
3497   if (size - 1 < BITS_PER_WORD
3498       && (mul_highpart_cost (speed, mode)
3499           + 2 * shift_cost (speed, mode, size-1)
3500           + 4 * add_cost (speed, mode) < max_cost))
3501     {
3502       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3503       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3504                           unsignedp, OPTAB_DIRECT);
3505       if (tem)
3506         /* We used the wrong signedness.  Adjust the result.  */
3507         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3508                                             tem, unsignedp);
3509     }
3510
3511   /* Try widening multiplication.  */
3512   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3513   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3514       && mul_widen_cost (speed, wider_mode) < max_cost)
3515     {
3516       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3517                           unsignedp, OPTAB_WIDEN);
3518       if (tem)
3519         return extract_high_half (mode, tem);
3520     }
3521
3522   /* Try widening the mode and perform a non-widening multiplication.  */
3523   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3524       && size - 1 < BITS_PER_WORD
3525       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3526           < max_cost))
3527     {
3528       rtx_insn *insns;
3529       rtx wop0, wop1;
3530
3531       /* We need to widen the operands, for example to ensure the
3532          constant multiplier is correctly sign or zero extended.
3533          Use a sequence to clean-up any instructions emitted by
3534          the conversions if things don't work out.  */
3535       start_sequence ();
3536       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3537       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3538       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3539                           unsignedp, OPTAB_WIDEN);
3540       insns = get_insns ();
3541       end_sequence ();
3542
3543       if (tem)
3544         {
3545           emit_insn (insns);
3546           return extract_high_half (mode, tem);
3547         }
3548     }
3549
3550   /* Try widening multiplication of opposite signedness, and adjust.  */
3551   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3552   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3553       && size - 1 < BITS_PER_WORD
3554       && (mul_widen_cost (speed, wider_mode)
3555           + 2 * shift_cost (speed, mode, size-1)
3556           + 4 * add_cost (speed, mode) < max_cost))
3557     {
3558       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3559                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3560       if (tem != 0)
3561         {
3562           tem = extract_high_half (mode, tem);
3563           /* We used the wrong signedness.  Adjust the result.  */
3564           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3565                                               target, unsignedp);
3566         }
3567     }
3568
3569   return 0;
3570 }
3571
3572 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3573    putting the high half of the result in TARGET if that is convenient,
3574    and return where the result is.  If the operation can not be performed,
3575    0 is returned.
3576
3577    MODE is the mode of operation and result.
3578
3579    UNSIGNEDP nonzero means unsigned multiply.
3580
3581    MAX_COST is the total allowed cost for the expanded RTL.  */
3582
3583 static rtx
3584 expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
3585                       rtx target, int unsignedp, int max_cost)
3586 {
3587   machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3588   unsigned HOST_WIDE_INT cnst1;
3589   int extra_cost;
3590   bool sign_adjust = false;
3591   enum mult_variant variant;
3592   struct algorithm alg;
3593   rtx tem;
3594   bool speed = optimize_insn_for_speed_p ();
3595
3596   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3597   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3598   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3599
3600   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3601
3602   /* We can't optimize modes wider than BITS_PER_WORD.
3603      ??? We might be able to perform double-word arithmetic if
3604      mode == word_mode, however all the cost calculations in
3605      synth_mult etc. assume single-word operations.  */
3606   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3607     return expmed_mult_highpart_optab (mode, op0, op1, target,
3608                                        unsignedp, max_cost);
3609
3610   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3611
3612   /* Check whether we try to multiply by a negative constant.  */
3613   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3614     {
3615       sign_adjust = true;
3616       extra_cost += add_cost (speed, mode);
3617     }
3618
3619   /* See whether shift/add multiplication is cheap enough.  */
3620   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3621                            max_cost - extra_cost))
3622     {
3623       /* See whether the specialized multiplication optabs are
3624          cheaper than the shift/add version.  */
3625       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3626                                         alg.cost.cost + extra_cost);
3627       if (tem)
3628         return tem;
3629
3630       tem = convert_to_mode (wider_mode, op0, unsignedp);
3631       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3632       tem = extract_high_half (mode, tem);
3633
3634       /* Adjust result for signedness.  */
3635       if (sign_adjust)
3636         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3637
3638       return tem;
3639     }
3640   return expmed_mult_highpart_optab (mode, op0, op1, target,
3641                                      unsignedp, max_cost);
3642 }
3643
3644
3645 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3646
3647 static rtx
3648 expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3649 {
3650   rtx result, temp, shift;
3651   rtx_code_label *label;
3652   int logd;
3653   int prec = GET_MODE_PRECISION (mode);
3654
3655   logd = floor_log2 (d);
3656   result = gen_reg_rtx (mode);
3657
3658   /* Avoid conditional branches when they're expensive.  */
3659   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3660       && optimize_insn_for_speed_p ())
3661     {
3662       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3663                                       mode, 0, -1);
3664       if (signmask)
3665         {
3666           HOST_WIDE_INT masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3667           signmask = force_reg (mode, signmask);
3668           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3669
3670           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3671              which instruction sequence to use.  If logical right shifts
3672              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3673              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3674
3675           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3676           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3677               || (set_src_cost (temp, optimize_insn_for_speed_p ())
3678                   > COSTS_N_INSNS (2)))
3679             {
3680               temp = expand_binop (mode, xor_optab, op0, signmask,
3681                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3682               temp = expand_binop (mode, sub_optab, temp, signmask,
3683                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3684               temp = expand_binop (mode, and_optab, temp,
3685                                    gen_int_mode (masklow, mode),
3686                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3687               temp = expand_binop (mode, xor_optab, temp, signmask,
3688                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3689               temp = expand_binop (mode, sub_optab, temp, signmask,
3690                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3691             }
3692           else
3693             {
3694               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3695                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3696               signmask = force_reg (mode, signmask);
3697
3698               temp = expand_binop (mode, add_optab, op0, signmask,
3699                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3700               temp = expand_binop (mode, and_optab, temp,
3701                                    gen_int_mode (masklow, mode),
3702                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3703               temp = expand_binop (mode, sub_optab, temp, signmask,
3704                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3705             }
3706           return temp;
3707         }
3708     }
3709
3710   /* Mask contains the mode's signbit and the significant bits of the
3711      modulus.  By including the signbit in the operation, many targets
3712      can avoid an explicit compare operation in the following comparison
3713      against zero.  */
3714   wide_int mask = wi::mask (logd, false, prec);
3715   mask = wi::set_bit (mask, prec - 1);
3716
3717   temp = expand_binop (mode, and_optab, op0,
3718                        immed_wide_int_const (mask, mode),
3719                        result, 1, OPTAB_LIB_WIDEN);
3720   if (temp != result)
3721     emit_move_insn (result, temp);
3722
3723   label = gen_label_rtx ();
3724   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3725
3726   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3727                        0, OPTAB_LIB_WIDEN);
3728
3729   mask = wi::mask (logd, true, prec);
3730   temp = expand_binop (mode, ior_optab, temp,
3731                        immed_wide_int_const (mask, mode),
3732                        result, 1, OPTAB_LIB_WIDEN);
3733   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3734                        0, OPTAB_LIB_WIDEN);
3735   if (temp != result)
3736     emit_move_insn (result, temp);
3737   emit_label (label);
3738   return result;
3739 }
3740
3741 /* Expand signed division of OP0 by a power of two D in mode MODE.
3742    This routine is only called for positive values of D.  */
3743
3744 static rtx
3745 expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3746 {
3747   rtx temp;
3748   rtx_code_label *label;
3749   int logd;
3750
3751   logd = floor_log2 (d);
3752
3753   if (d == 2
3754       && BRANCH_COST (optimize_insn_for_speed_p (),
3755                       false) >= 1)
3756     {
3757       temp = gen_reg_rtx (mode);
3758       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3759       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3760                            0, OPTAB_LIB_WIDEN);
3761       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3762     }
3763
3764 #ifdef HAVE_conditional_move
3765   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3766       >= 2)
3767     {
3768       rtx temp2;
3769
3770       start_sequence ();
3771       temp2 = copy_to_mode_reg (mode, op0);
3772       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3773                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3774       temp = force_reg (mode, temp);
3775
3776       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3777       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3778                                      mode, temp, temp2, mode, 0);
3779       if (temp2)
3780         {
3781           rtx_insn *seq = get_insns ();
3782           end_sequence ();
3783           emit_insn (seq);
3784           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3785         }
3786       end_sequence ();
3787     }
3788 #endif
3789
3790   if (BRANCH_COST (optimize_insn_for_speed_p (),
3791                    false) >= 2)
3792     {
3793       int ushift = GET_MODE_BITSIZE (mode) - logd;
3794
3795       temp = gen_reg_rtx (mode);
3796       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3797       if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3798           || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3799              > COSTS_N_INSNS (1))
3800         temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3801                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3802       else
3803         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3804                              ushift, NULL_RTX, 1);
3805       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3806                            0, OPTAB_LIB_WIDEN);
3807       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3808     }
3809
3810   label = gen_label_rtx ();
3811   temp = copy_to_mode_reg (mode, op0);
3812   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3813   expand_inc (temp, gen_int_mode (d - 1, mode));
3814   emit_label (label);
3815   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3816 }
3817 \f
3818 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3819    if that is convenient, and returning where the result is.
3820    You may request either the quotient or the remainder as the result;
3821    specify REM_FLAG nonzero to get the remainder.
3822
3823    CODE is the expression code for which kind of division this is;
3824    it controls how rounding is done.  MODE is the machine mode to use.
3825    UNSIGNEDP nonzero means do unsigned division.  */
3826
3827 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3828    and then correct it by or'ing in missing high bits
3829    if result of ANDI is nonzero.
3830    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3831    This could optimize to a bfexts instruction.
3832    But C doesn't use these operations, so their optimizations are
3833    left for later.  */
3834 /* ??? For modulo, we don't actually need the highpart of the first product,
3835    the low part will do nicely.  And for small divisors, the second multiply
3836    can also be a low-part only multiply or even be completely left out.
3837    E.g. to calculate the remainder of a division by 3 with a 32 bit
3838    multiply, multiply with 0x55555556 and extract the upper two bits;
3839    the result is exact for inputs up to 0x1fffffff.
3840    The input range can be reduced by using cross-sum rules.
3841    For odd divisors >= 3, the following table gives right shift counts
3842    so that if a number is shifted by an integer multiple of the given
3843    amount, the remainder stays the same:
3844    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3845    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3846    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3847    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3848    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3849
3850    Cross-sum rules for even numbers can be derived by leaving as many bits
3851    to the right alone as the divisor has zeros to the right.
3852    E.g. if x is an unsigned 32 bit number:
3853    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3854    */
3855
3856 rtx
3857 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
3858                rtx op0, rtx op1, rtx target, int unsignedp)
3859 {
3860   machine_mode compute_mode;
3861   rtx tquotient;
3862   rtx quotient = 0, remainder = 0;
3863   rtx_insn *last;
3864   int size;
3865   rtx_insn *insn;
3866   optab optab1, optab2;
3867   int op1_is_constant, op1_is_pow2 = 0;
3868   int max_cost, extra_cost;
3869   static HOST_WIDE_INT last_div_const = 0;
3870   bool speed = optimize_insn_for_speed_p ();
3871
3872   op1_is_constant = CONST_INT_P (op1);
3873   if (op1_is_constant)
3874     {
3875       unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1);
3876       if (unsignedp)
3877         ext_op1 &= GET_MODE_MASK (mode);
3878       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3879                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3880     }
3881
3882   /*
3883      This is the structure of expand_divmod:
3884
3885      First comes code to fix up the operands so we can perform the operations
3886      correctly and efficiently.
3887
3888      Second comes a switch statement with code specific for each rounding mode.
3889      For some special operands this code emits all RTL for the desired
3890      operation, for other cases, it generates only a quotient and stores it in
3891      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3892      to indicate that it has not done anything.
3893
3894      Last comes code that finishes the operation.  If QUOTIENT is set and
3895      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3896      QUOTIENT is not set, it is computed using trunc rounding.
3897
3898      We try to generate special code for division and remainder when OP1 is a
3899      constant.  If |OP1| = 2**n we can use shifts and some other fast
3900      operations.  For other values of OP1, we compute a carefully selected
3901      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3902      by m.
3903
3904      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3905      half of the product.  Different strategies for generating the product are
3906      implemented in expmed_mult_highpart.
3907
3908      If what we actually want is the remainder, we generate that by another
3909      by-constant multiplication and a subtraction.  */
3910
3911   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3912      code below will malfunction if we are, so check here and handle
3913      the special case if so.  */
3914   if (op1 == const1_rtx)
3915     return rem_flag ? const0_rtx : op0;
3916
3917     /* When dividing by -1, we could get an overflow.
3918      negv_optab can handle overflows.  */
3919   if (! unsignedp && op1 == constm1_rtx)
3920     {
3921       if (rem_flag)
3922         return const0_rtx;
3923       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
3924                           ? negv_optab : neg_optab, op0, target, 0);
3925     }
3926
3927   if (target
3928       /* Don't use the function value register as a target
3929          since we have to read it as well as write it,
3930          and function-inlining gets confused by this.  */
3931       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3932           /* Don't clobber an operand while doing a multi-step calculation.  */
3933           || ((rem_flag || op1_is_constant)
3934               && (reg_mentioned_p (target, op0)
3935                   || (MEM_P (op0) && MEM_P (target))))
3936           || reg_mentioned_p (target, op1)
3937           || (MEM_P (op1) && MEM_P (target))))
3938     target = 0;
3939
3940   /* Get the mode in which to perform this computation.  Normally it will
3941      be MODE, but sometimes we can't do the desired operation in MODE.
3942      If so, pick a wider mode in which we can do the operation.  Convert
3943      to that mode at the start to avoid repeated conversions.
3944
3945      First see what operations we need.  These depend on the expression
3946      we are evaluating.  (We assume that divxx3 insns exist under the
3947      same conditions that modxx3 insns and that these insns don't normally
3948      fail.  If these assumptions are not correct, we may generate less
3949      efficient code in some cases.)
3950
3951      Then see if we find a mode in which we can open-code that operation
3952      (either a division, modulus, or shift).  Finally, check for the smallest
3953      mode for which we can do the operation with a library call.  */
3954
3955   /* We might want to refine this now that we have division-by-constant
3956      optimization.  Since expmed_mult_highpart tries so many variants, it is
3957      not straightforward to generalize this.  Maybe we should make an array
3958      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3959
3960   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3961             ? (unsignedp ? lshr_optab : ashr_optab)
3962             : (unsignedp ? udiv_optab : sdiv_optab));
3963   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3964             ? optab1
3965             : (unsignedp ? udivmod_optab : sdivmod_optab));
3966
3967   for (compute_mode = mode; compute_mode != VOIDmode;
3968        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3969     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3970         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3971       break;
3972
3973   if (compute_mode == VOIDmode)
3974     for (compute_mode = mode; compute_mode != VOIDmode;
3975          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3976       if (optab_libfunc (optab1, compute_mode)
3977           || optab_libfunc (optab2, compute_mode))
3978         break;
3979
3980   /* If we still couldn't find a mode, use MODE, but expand_binop will
3981      probably die.  */
3982   if (compute_mode == VOIDmode)
3983     compute_mode = mode;
3984
3985   if (target && GET_MODE (target) == compute_mode)
3986     tquotient = target;
3987   else
3988     tquotient = gen_reg_rtx (compute_mode);
3989
3990   size = GET_MODE_BITSIZE (compute_mode);
3991 #if 0
3992   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3993      (mode), and thereby get better code when OP1 is a constant.  Do that
3994      later.  It will require going over all usages of SIZE below.  */
3995   size = GET_MODE_BITSIZE (mode);
3996 #endif
3997
3998   /* Only deduct something for a REM if the last divide done was
3999      for a different constant.   Then set the constant of the last
4000      divide.  */
4001   max_cost = (unsignedp
4002               ? udiv_cost (speed, compute_mode)
4003               : sdiv_cost (speed, compute_mode));
4004   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4005                      && INTVAL (op1) == last_div_const))
4006     max_cost -= (mul_cost (speed, compute_mode)
4007                  + add_cost (speed, compute_mode));
4008
4009   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4010
4011   /* Now convert to the best mode to use.  */
4012   if (compute_mode != mode)
4013     {
4014       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4015       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4016
4017       /* convert_modes may have placed op1 into a register, so we
4018          must recompute the following.  */
4019       op1_is_constant = CONST_INT_P (op1);
4020       op1_is_pow2 = (op1_is_constant
4021                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4022                           || (! unsignedp
4023                               && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1))))));
4024     }
4025
4026   /* If one of the operands is a volatile MEM, copy it into a register.  */
4027
4028   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4029     op0 = force_reg (compute_mode, op0);
4030   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4031     op1 = force_reg (compute_mode, op1);
4032
4033   /* If we need the remainder or if OP1 is constant, we need to
4034      put OP0 in a register in case it has any queued subexpressions.  */
4035   if (rem_flag || op1_is_constant)
4036     op0 = force_reg (compute_mode, op0);
4037
4038   last = get_last_insn ();
4039
4040   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4041   if (unsignedp)
4042     {
4043       if (code == FLOOR_DIV_EXPR)
4044         code = TRUNC_DIV_EXPR;
4045       if (code == FLOOR_MOD_EXPR)
4046         code = TRUNC_MOD_EXPR;
4047       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4048         code = TRUNC_DIV_EXPR;
4049     }
4050
4051   if (op1 != const0_rtx)
4052     switch (code)
4053       {
4054       case TRUNC_MOD_EXPR:
4055       case TRUNC_DIV_EXPR:
4056         if (op1_is_constant)
4057           {
4058             if (unsignedp)
4059               {
4060                 unsigned HOST_WIDE_INT mh, ml;
4061                 int pre_shift, post_shift;
4062                 int dummy;
4063                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4064                                             & GET_MODE_MASK (compute_mode));
4065
4066                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4067                   {
4068                     pre_shift = floor_log2 (d);
4069                     if (rem_flag)
4070                       {
4071                         unsigned HOST_WIDE_INT mask
4072                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4073                         remainder
4074                           = expand_binop (compute_mode, and_optab, op0,
4075                                           gen_int_mode (mask, compute_mode),
4076                                           remainder, 1,
4077                                           OPTAB_LIB_WIDEN);
4078                         if (remainder)
4079                           return gen_lowpart (mode, remainder);
4080                       }
4081                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4082                                              pre_shift, tquotient, 1);
4083                   }
4084                 else if (size <= HOST_BITS_PER_WIDE_INT)
4085                   {
4086                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4087                       {
4088                         /* Most significant bit of divisor is set; emit an scc
4089                            insn.  */
4090                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4091                                                           compute_mode, 1, 1);
4092                       }
4093                     else
4094                       {
4095                         /* Find a suitable multiplier and right shift count
4096                            instead of multiplying with D.  */
4097
4098                         mh = choose_multiplier (d, size, size,
4099                                                 &ml, &post_shift, &dummy);
4100
4101                         /* If the suggested multiplier is more than SIZE bits,
4102                            we can do better for even divisors, using an
4103                            initial right shift.  */
4104                         if (mh != 0 && (d & 1) == 0)
4105                           {
4106                             pre_shift = floor_log2 (d & -d);
4107                             mh = choose_multiplier (d >> pre_shift, size,
4108                                                     size - pre_shift,
4109                                                     &ml, &post_shift, &dummy);
4110                             gcc_assert (!mh);
4111                           }
4112                         else
4113                           pre_shift = 0;
4114
4115                         if (mh != 0)
4116                           {
4117                             rtx t1, t2, t3, t4;
4118
4119                             if (post_shift - 1 >= BITS_PER_WORD)
4120                               goto fail1;
4121
4122                             extra_cost
4123                               = (shift_cost (speed, compute_mode, post_shift - 1)
4124                                  + shift_cost (speed, compute_mode, 1)
4125                                  + 2 * add_cost (speed, compute_mode));
4126                             t1 = expmed_mult_highpart
4127                               (compute_mode, op0,
4128                                gen_int_mode (ml, compute_mode),
4129                                NULL_RTX, 1, max_cost - extra_cost);
4130                             if (t1 == 0)
4131                               goto fail1;
4132                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4133                                                                op0, t1),
4134                                                 NULL_RTX);
4135                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4136                                                t2, 1, NULL_RTX, 1);
4137                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4138                                                               t1, t3),
4139                                                 NULL_RTX);
4140                             quotient = expand_shift
4141                               (RSHIFT_EXPR, compute_mode, t4,
4142                                post_shift - 1, tquotient, 1);
4143                           }
4144                         else
4145                           {
4146                             rtx t1, t2;
4147
4148                             if (pre_shift >= BITS_PER_WORD
4149                                 || post_shift >= BITS_PER_WORD)
4150                               goto fail1;
4151
4152                             t1 = expand_shift
4153                               (RSHIFT_EXPR, compute_mode, op0,
4154                                pre_shift, NULL_RTX, 1);
4155                             extra_cost
4156                               = (shift_cost (speed, compute_mode, pre_shift)
4157                                  + shift_cost (speed, compute_mode, post_shift));
4158                             t2 = expmed_mult_highpart
4159                               (compute_mode, t1,
4160                                gen_int_mode (ml, compute_mode),
4161                                NULL_RTX, 1, max_cost - extra_cost);
4162                             if (t2 == 0)
4163                               goto fail1;
4164                             quotient = expand_shift
4165                               (RSHIFT_EXPR, compute_mode, t2,
4166                                post_shift, tquotient, 1);
4167                           }
4168                       }
4169                   }
4170                 else            /* Too wide mode to use tricky code */
4171                   break;
4172
4173                 insn = get_last_insn ();
4174                 if (insn != last)
4175                   set_dst_reg_note (insn, REG_EQUAL,
4176                                     gen_rtx_UDIV (compute_mode, op0, op1),
4177                                     quotient);
4178               }
4179             else                /* TRUNC_DIV, signed */
4180               {
4181                 unsigned HOST_WIDE_INT ml;
4182                 int lgup, post_shift;
4183                 rtx mlr;
4184                 HOST_WIDE_INT d = INTVAL (op1);
4185                 unsigned HOST_WIDE_INT abs_d;
4186
4187                 /* Since d might be INT_MIN, we have to cast to
4188                    unsigned HOST_WIDE_INT before negating to avoid
4189                    undefined signed overflow.  */
4190                 abs_d = (d >= 0
4191                          ? (unsigned HOST_WIDE_INT) d
4192                          : - (unsigned HOST_WIDE_INT) d);
4193
4194                 /* n rem d = n rem -d */
4195                 if (rem_flag && d < 0)
4196                   {
4197                     d = abs_d;
4198                     op1 = gen_int_mode (abs_d, compute_mode);
4199                   }
4200
4201                 if (d == 1)
4202                   quotient = op0;
4203                 else if (d == -1)
4204                   quotient = expand_unop (compute_mode, neg_optab, op0,
4205                                           tquotient, 0);
4206                 else if (HOST_BITS_PER_WIDE_INT >= size
4207                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4208                   {
4209                     /* This case is not handled correctly below.  */
4210                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4211                                                 compute_mode, 1, 1);
4212                     if (quotient == 0)
4213                       goto fail1;
4214                   }
4215                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4216                          && (rem_flag
4217                              ? smod_pow2_cheap (speed, compute_mode)
4218                              : sdiv_pow2_cheap (speed, compute_mode))
4219                          /* We assume that cheap metric is true if the
4220                             optab has an expander for this mode.  */
4221                          && ((optab_handler ((rem_flag ? smod_optab
4222                                               : sdiv_optab),
4223                                              compute_mode)
4224                               != CODE_FOR_nothing)
4225                              || (optab_handler (sdivmod_optab,
4226                                                 compute_mode)
4227                                  != CODE_FOR_nothing)))
4228                   ;
4229                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4230                   {
4231                     if (rem_flag)
4232                       {
4233                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4234                         if (remainder)
4235                           return gen_lowpart (mode, remainder);
4236                       }
4237
4238                     if (sdiv_pow2_cheap (speed, compute_mode)
4239                         && ((optab_handler (sdiv_optab, compute_mode)
4240                              != CODE_FOR_nothing)
4241                             || (optab_handler (sdivmod_optab, compute_mode)
4242                                 != CODE_FOR_nothing)))
4243                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4244                                                 compute_mode, op0,
4245                                                 gen_int_mode (abs_d,
4246                                                               compute_mode),
4247                                                 NULL_RTX, 0);
4248                     else
4249                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4250
4251                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4252                        negate the quotient.  */
4253                     if (d < 0)
4254                       {
4255                         insn = get_last_insn ();
4256                         if (insn != last
4257                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4258                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4259                           set_dst_reg_note (insn, REG_EQUAL,
4260                                             gen_rtx_DIV (compute_mode, op0,
4261                                                          gen_int_mode
4262                                                            (abs_d,
4263                                                             compute_mode)),
4264                                             quotient);
4265
4266                         quotient = expand_unop (compute_mode, neg_optab,
4267                                                 quotient, quotient, 0);
4268                       }
4269                   }
4270                 else if (size <= HOST_BITS_PER_WIDE_INT)
4271                   {
4272                     choose_multiplier (abs_d, size, size - 1,
4273                                        &ml, &post_shift, &lgup);
4274                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4275                       {
4276                         rtx t1, t2, t3;
4277
4278                         if (post_shift >= BITS_PER_WORD
4279                             || size - 1 >= BITS_PER_WORD)
4280                           goto fail1;
4281
4282                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4283                                       + shift_cost (speed, compute_mode, size - 1)
4284                                       + add_cost (speed, compute_mode));
4285                         t1 = expmed_mult_highpart
4286                           (compute_mode, op0, gen_int_mode (ml, compute_mode),
4287                            NULL_RTX, 0, max_cost - extra_cost);
4288                         if (t1 == 0)
4289                           goto fail1;
4290                         t2 = expand_shift
4291                           (RSHIFT_EXPR, compute_mode, t1,
4292                            post_shift, NULL_RTX, 0);
4293                         t3 = expand_shift
4294                           (RSHIFT_EXPR, compute_mode, op0,
4295                            size - 1, NULL_RTX, 0);
4296                         if (d < 0)
4297                           quotient
4298                             = force_operand (gen_rtx_MINUS (compute_mode,
4299                                                             t3, t2),
4300                                              tquotient);
4301                         else
4302                           quotient
4303                             = force_operand (gen_rtx_MINUS (compute_mode,
4304                                                             t2, t3),
4305                                              tquotient);
4306                       }
4307                     else
4308                       {
4309                         rtx t1, t2, t3, t4;
4310
4311                         if (post_shift >= BITS_PER_WORD
4312                             || size - 1 >= BITS_PER_WORD)
4313                           goto fail1;
4314
4315                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4316                         mlr = gen_int_mode (ml, compute_mode);
4317                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4318                                       + shift_cost (speed, compute_mode, size - 1)
4319                                       + 2 * add_cost (speed, compute_mode));
4320                         t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4321                                                    NULL_RTX, 0,
4322                                                    max_cost - extra_cost);
4323                         if (t1 == 0)
4324                           goto fail1;
4325                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4326                                                           t1, op0),
4327                                             NULL_RTX);
4328                         t3 = expand_shift
4329                           (RSHIFT_EXPR, compute_mode, t2,
4330                            post_shift, NULL_RTX, 0);
4331                         t4 = expand_shift
4332                           (RSHIFT_EXPR, compute_mode, op0,
4333                            size - 1, NULL_RTX, 0);
4334                         if (d < 0)
4335                           quotient
4336                             = force_operand (gen_rtx_MINUS (compute_mode,
4337                                                             t4, t3),
4338                                              tquotient);
4339                         else
4340                           quotient
4341                             = force_operand (gen_rtx_MINUS (compute_mode,
4342                                                             t3, t4),
4343                                              tquotient);
4344                       }
4345                   }
4346                 else            /* Too wide mode to use tricky code */
4347                   break;
4348
4349                 insn = get_last_insn ();
4350                 if (insn != last)
4351                   set_dst_reg_note (insn, REG_EQUAL,
4352                                     gen_rtx_DIV (compute_mode, op0, op1),
4353                                     quotient);
4354               }
4355             break;
4356           }
4357       fail1:
4358         delete_insns_since (last);
4359         break;
4360
4361       case FLOOR_DIV_EXPR:
4362       case FLOOR_MOD_EXPR:
4363       /* We will come here only for signed operations.  */
4364         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4365           {
4366             unsigned HOST_WIDE_INT mh, ml;
4367             int pre_shift, lgup, post_shift;
4368             HOST_WIDE_INT d = INTVAL (op1);
4369
4370             if (d > 0)
4371               {
4372                 /* We could just as easily deal with negative constants here,
4373                    but it does not seem worth the trouble for GCC 2.6.  */
4374                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4375                   {
4376                     pre_shift = floor_log2 (d);
4377                     if (rem_flag)
4378                       {
4379                         unsigned HOST_WIDE_INT mask
4380                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4381                         remainder = expand_binop
4382                           (compute_mode, and_optab, op0,
4383                            gen_int_mode (mask, compute_mode),
4384                            remainder, 0, OPTAB_LIB_WIDEN);
4385                         if (remainder)
4386                           return gen_lowpart (mode, remainder);
4387                       }
4388                     quotient = expand_shift
4389                       (RSHIFT_EXPR, compute_mode, op0,
4390                        pre_shift, tquotient, 0);
4391                   }
4392                 else
4393                   {
4394                     rtx t1, t2, t3, t4;
4395
4396                     mh = choose_multiplier (d, size, size - 1,
4397                                             &ml, &post_shift, &lgup);
4398                     gcc_assert (!mh);
4399
4400                     if (post_shift < BITS_PER_WORD
4401                         && size - 1 < BITS_PER_WORD)
4402                       {
4403                         t1 = expand_shift
4404                           (RSHIFT_EXPR, compute_mode, op0,
4405                            size - 1, NULL_RTX, 0);
4406                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4407                                            NULL_RTX, 0, OPTAB_WIDEN);
4408                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4409                                       + shift_cost (speed, compute_mode, size - 1)
4410                                       + 2 * add_cost (speed, compute_mode));
4411                         t3 = expmed_mult_highpart
4412                           (compute_mode, t2, gen_int_mode (ml, compute_mode),
4413                            NULL_RTX, 1, max_cost - extra_cost);
4414                         if (t3 != 0)
4415                           {
4416                             t4 = expand_shift
4417                               (RSHIFT_EXPR, compute_mode, t3,
4418                                post_shift, NULL_RTX, 1);
4419                             quotient = expand_binop (compute_mode, xor_optab,
4420                                                      t4, t1, tquotient, 0,
4421                                                      OPTAB_WIDEN);
4422                           }
4423                       }
4424                   }
4425               }
4426             else
4427               {
4428                 rtx nsign, t1, t2, t3, t4;
4429                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4430                                                   op0, constm1_rtx), NULL_RTX);
4431                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4432                                    0, OPTAB_WIDEN);
4433                 nsign = expand_shift
4434                   (RSHIFT_EXPR, compute_mode, t2,
4435                    size - 1, NULL_RTX, 0);
4436                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4437                                     NULL_RTX);
4438                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4439                                     NULL_RTX, 0);
4440                 if (t4)
4441                   {
4442                     rtx t5;
4443                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4444                                       NULL_RTX, 0);
4445                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4446                                                             t4, t5),
4447                                               tquotient);
4448                   }
4449               }
4450           }
4451
4452         if (quotient != 0)
4453           break;
4454         delete_insns_since (last);
4455
4456         /* Try using an instruction that produces both the quotient and
4457            remainder, using truncation.  We can easily compensate the quotient
4458            or remainder to get floor rounding, once we have the remainder.
4459            Notice that we compute also the final remainder value here,
4460            and return the result right away.  */
4461         if (target == 0 || GET_MODE (target) != compute_mode)
4462           target = gen_reg_rtx (compute_mode);
4463
4464         if (rem_flag)
4465           {
4466             remainder
4467               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4468             quotient = gen_reg_rtx (compute_mode);
4469           }
4470         else
4471           {
4472             quotient
4473               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4474             remainder = gen_reg_rtx (compute_mode);
4475           }
4476
4477         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4478                                  quotient, remainder, 0))
4479           {
4480             /* This could be computed with a branch-less sequence.
4481                Save that for later.  */
4482             rtx tem;
4483             rtx_code_label *label = gen_label_rtx ();
4484             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4485             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4486                                 NULL_RTX, 0, OPTAB_WIDEN);
4487             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4488             expand_dec (quotient, const1_rtx);
4489             expand_inc (remainder, op1);
4490             emit_label (label);
4491             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4492           }
4493
4494         /* No luck with division elimination or divmod.  Have to do it
4495            by conditionally adjusting op0 *and* the result.  */
4496         {
4497           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4498           rtx adjusted_op0;
4499           rtx tem;
4500
4501           quotient = gen_reg_rtx (compute_mode);
4502           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4503           label1 = gen_label_rtx ();
4504           label2 = gen_label_rtx ();
4505           label3 = gen_label_rtx ();
4506           label4 = gen_label_rtx ();
4507           label5 = gen_label_rtx ();
4508           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4509           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4510           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4511                               quotient, 0, OPTAB_LIB_WIDEN);
4512           if (tem != quotient)
4513             emit_move_insn (quotient, tem);
4514           emit_jump_insn (gen_jump (label5));
4515           emit_barrier ();
4516           emit_label (label1);
4517           expand_inc (adjusted_op0, const1_rtx);
4518           emit_jump_insn (gen_jump (label4));
4519           emit_barrier ();
4520           emit_label (label2);
4521           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4522           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4523                               quotient, 0, OPTAB_LIB_WIDEN);
4524           if (tem != quotient)
4525             emit_move_insn (quotient, tem);
4526           emit_jump_insn (gen_jump (label5));
4527           emit_barrier ();
4528           emit_label (label3);
4529           expand_dec (adjusted_op0, const1_rtx);
4530           emit_label (label4);
4531           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4532                               quotient, 0, OPTAB_LIB_WIDEN);
4533           if (tem != quotient)
4534             emit_move_insn (quotient, tem);
4535           expand_dec (quotient, const1_rtx);
4536           emit_label (label5);
4537         }
4538         break;
4539
4540       case CEIL_DIV_EXPR:
4541       case CEIL_MOD_EXPR:
4542         if (unsignedp)
4543           {
4544             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4545               {
4546                 rtx t1, t2, t3;
4547                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4548                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4549                                    floor_log2 (d), tquotient, 1);
4550                 t2 = expand_binop (compute_mode, and_optab, op0,
4551                                    gen_int_mode (d - 1, compute_mode),
4552                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4553                 t3 = gen_reg_rtx (compute_mode);
4554                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4555                                       compute_mode, 1, 1);
4556                 if (t3 == 0)
4557                   {
4558                     rtx_code_label *lab;
4559                     lab = gen_label_rtx ();
4560                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4561                     expand_inc (t1, const1_rtx);
4562                     emit_label (lab);
4563                     quotient = t1;
4564                   }
4565                 else
4566                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4567                                                           t1, t3),
4568                                             tquotient);
4569                 break;
4570               }
4571
4572             /* Try using an instruction that produces both the quotient and
4573                remainder, using truncation.  We can easily compensate the
4574                quotient or remainder to get ceiling rounding, once we have the
4575                remainder.  Notice that we compute also the final remainder
4576                value here, and return the result right away.  */
4577             if (target == 0 || GET_MODE (target) != compute_mode)
4578               target = gen_reg_rtx (compute_mode);
4579
4580             if (rem_flag)
4581               {
4582                 remainder = (REG_P (target)
4583                              ? target : gen_reg_rtx (compute_mode));
4584                 quotient = gen_reg_rtx (compute_mode);
4585               }
4586             else
4587               {
4588                 quotient = (REG_P (target)
4589                             ? target : gen_reg_rtx (compute_mode));
4590                 remainder = gen_reg_rtx (compute_mode);
4591               }
4592
4593             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4594                                      remainder, 1))
4595               {
4596                 /* This could be computed with a branch-less sequence.
4597                    Save that for later.  */
4598                 rtx_code_label *label = gen_label_rtx ();
4599                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4600                                  compute_mode, label);
4601                 expand_inc (quotient, const1_rtx);
4602                 expand_dec (remainder, op1);
4603                 emit_label (label);
4604                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4605               }
4606
4607             /* No luck with division elimination or divmod.  Have to do it
4608                by conditionally adjusting op0 *and* the result.  */
4609             {
4610               rtx_code_label *label1, *label2;
4611               rtx adjusted_op0, tem;
4612
4613               quotient = gen_reg_rtx (compute_mode);
4614               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4615               label1 = gen_label_rtx ();
4616               label2 = gen_label_rtx ();
4617               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4618                                compute_mode, label1);
4619               emit_move_insn  (quotient, const0_rtx);
4620               emit_jump_insn (gen_jump (label2));
4621               emit_barrier ();
4622               emit_label (label1);
4623               expand_dec (adjusted_op0, const1_rtx);
4624               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4625                                   quotient, 1, OPTAB_LIB_WIDEN);
4626               if (tem != quotient)
4627                 emit_move_insn (quotient, tem);
4628               expand_inc (quotient, const1_rtx);
4629               emit_label (label2);
4630             }
4631           }
4632         else /* signed */
4633           {
4634             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4635                 && INTVAL (op1) >= 0)
4636               {
4637                 /* This is extremely similar to the code for the unsigned case
4638                    above.  For 2.7 we should merge these variants, but for
4639                    2.6.1 I don't want to touch the code for unsigned since that
4640                    get used in C.  The signed case will only be used by other
4641                    languages (Ada).  */
4642
4643                 rtx t1, t2, t3;
4644                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4645                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4646                                    floor_log2 (d), tquotient, 0);
4647                 t2 = expand_binop (compute_mode, and_optab, op0,
4648                                    gen_int_mode (d - 1, compute_mode),
4649                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4650                 t3 = gen_reg_rtx (compute_mode);
4651                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4652                                       compute_mode, 1, 1);
4653                 if (t3 == 0)
4654                   {
4655                     rtx_code_label *lab;
4656                     lab = gen_label_rtx ();
4657                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4658                     expand_inc (t1, const1_rtx);
4659                     emit_label (lab);
4660                     quotient = t1;
4661                   }
4662                 else
4663                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4664                                                           t1, t3),
4665                                             tquotient);
4666                 break;
4667               }
4668
4669             /* Try using an instruction that produces both the quotient and
4670                remainder, using truncation.  We can easily compensate the
4671                quotient or remainder to get ceiling rounding, once we have the
4672                remainder.  Notice that we compute also the final remainder
4673                value here, and return the result right away.  */
4674             if (target == 0 || GET_MODE (target) != compute_mode)
4675               target = gen_reg_rtx (compute_mode);
4676             if (rem_flag)
4677               {
4678                 remainder= (REG_P (target)
4679                             ? target : gen_reg_rtx (compute_mode));
4680                 quotient = gen_reg_rtx (compute_mode);
4681               }
4682             else
4683               {
4684                 quotient = (REG_P (target)
4685                             ? target : gen_reg_rtx (compute_mode));
4686                 remainder = gen_reg_rtx (compute_mode);
4687               }
4688
4689             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4690                                      remainder, 0))
4691               {
4692                 /* This could be computed with a branch-less sequence.
4693                    Save that for later.  */
4694                 rtx tem;
4695                 rtx_code_label *label = gen_label_rtx ();
4696                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4697                                  compute_mode, label);
4698                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4699                                     NULL_RTX, 0, OPTAB_WIDEN);
4700                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4701                 expand_inc (quotient, const1_rtx);
4702                 expand_dec (remainder, op1);
4703                 emit_label (label);
4704                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4705               }
4706
4707             /* No luck with division elimination or divmod.  Have to do it
4708                by conditionally adjusting op0 *and* the result.  */
4709             {
4710               rtx_code_label *label1, *label2, *label3, *label4, *label5;
4711               rtx adjusted_op0;
4712               rtx tem;
4713
4714               quotient = gen_reg_rtx (compute_mode);
4715               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4716               label1 = gen_label_rtx ();
4717               label2 = gen_label_rtx ();
4718               label3 = gen_label_rtx ();
4719               label4 = gen_label_rtx ();
4720               label5 = gen_label_rtx ();
4721               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4722               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4723                                compute_mode, label1);
4724               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4725                                   quotient, 0, OPTAB_LIB_WIDEN);
4726               if (tem != quotient)
4727                 emit_move_insn (quotient, tem);
4728               emit_jump_insn (gen_jump (label5));
4729               emit_barrier ();
4730               emit_label (label1);
4731               expand_dec (adjusted_op0, const1_rtx);
4732               emit_jump_insn (gen_jump (label4));
4733               emit_barrier ();
4734               emit_label (label2);
4735               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4736                                compute_mode, label3);
4737               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4738                                   quotient, 0, OPTAB_LIB_WIDEN);
4739               if (tem != quotient)
4740                 emit_move_insn (quotient, tem);
4741               emit_jump_insn (gen_jump (label5));
4742               emit_barrier ();
4743               emit_label (label3);
4744               expand_inc (adjusted_op0, const1_rtx);
4745               emit_label (label4);
4746               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4747                                   quotient, 0, OPTAB_LIB_WIDEN);
4748               if (tem != quotient)
4749                 emit_move_insn (quotient, tem);
4750               expand_inc (quotient, const1_rtx);
4751               emit_label (label5);
4752             }
4753           }
4754         break;
4755
4756       case EXACT_DIV_EXPR:
4757         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4758           {
4759             HOST_WIDE_INT d = INTVAL (op1);
4760             unsigned HOST_WIDE_INT ml;
4761             int pre_shift;
4762             rtx t1;
4763
4764             pre_shift = floor_log2 (d & -d);
4765             ml = invert_mod2n (d >> pre_shift, size);
4766             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4767                                pre_shift, NULL_RTX, unsignedp);
4768             quotient = expand_mult (compute_mode, t1,
4769                                     gen_int_mode (ml, compute_mode),
4770                                     NULL_RTX, 1);
4771
4772             insn = get_last_insn ();
4773             set_dst_reg_note (insn, REG_EQUAL,
4774                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4775                                               compute_mode, op0, op1),
4776                               quotient);
4777           }
4778         break;
4779
4780       case ROUND_DIV_EXPR:
4781       case ROUND_MOD_EXPR:
4782         if (unsignedp)
4783           {
4784             rtx tem;
4785             rtx_code_label *label;
4786             label = gen_label_rtx ();
4787             quotient = gen_reg_rtx (compute_mode);
4788             remainder = gen_reg_rtx (compute_mode);
4789             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4790               {
4791                 rtx tem;
4792                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4793                                          quotient, 1, OPTAB_LIB_WIDEN);
4794                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4795                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4796                                           remainder, 1, OPTAB_LIB_WIDEN);
4797               }
4798             tem = plus_constant (compute_mode, op1, -1);
4799             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4800             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4801             expand_inc (quotient, const1_rtx);
4802             expand_dec (remainder, op1);
4803             emit_label (label);
4804           }
4805         else
4806           {
4807             rtx abs_rem, abs_op1, tem, mask;
4808             rtx_code_label *label;
4809             label = gen_label_rtx ();
4810             quotient = gen_reg_rtx (compute_mode);
4811             remainder = gen_reg_rtx (compute_mode);
4812             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4813               {
4814                 rtx tem;
4815                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4816                                          quotient, 0, OPTAB_LIB_WIDEN);
4817                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4818                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4819                                           remainder, 0, OPTAB_LIB_WIDEN);
4820               }
4821             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4822             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4823             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4824                                 1, NULL_RTX, 1);
4825             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4826             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4827                                 NULL_RTX, 0, OPTAB_WIDEN);
4828             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4829                                  size - 1, NULL_RTX, 0);
4830             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4831                                 NULL_RTX, 0, OPTAB_WIDEN);
4832             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4833                                 NULL_RTX, 0, OPTAB_WIDEN);
4834             expand_inc (quotient, tem);
4835             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4836                                 NULL_RTX, 0, OPTAB_WIDEN);
4837             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4838                                 NULL_RTX, 0, OPTAB_WIDEN);
4839             expand_dec (remainder, tem);
4840             emit_label (label);
4841           }
4842         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4843
4844       default:
4845         gcc_unreachable ();
4846       }
4847
4848   if (quotient == 0)
4849     {
4850       if (target && GET_MODE (target) != compute_mode)
4851         target = 0;
4852
4853       if (rem_flag)
4854         {
4855           /* Try to produce the remainder without producing the quotient.
4856              If we seem to have a divmod pattern that does not require widening,
4857              don't try widening here.  We should really have a WIDEN argument
4858              to expand_twoval_binop, since what we'd really like to do here is
4859              1) try a mod insn in compute_mode
4860              2) try a divmod insn in compute_mode
4861              3) try a div insn in compute_mode and multiply-subtract to get
4862                 remainder
4863              4) try the same things with widening allowed.  */
4864           remainder
4865             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4866                                  op0, op1, target,
4867                                  unsignedp,
4868                                  ((optab_handler (optab2, compute_mode)
4869                                    != CODE_FOR_nothing)
4870                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4871           if (remainder == 0)
4872             {
4873               /* No luck there.  Can we do remainder and divide at once
4874                  without a library call?  */
4875               remainder = gen_reg_rtx (compute_mode);
4876               if (! expand_twoval_binop ((unsignedp
4877                                           ? udivmod_optab
4878                                           : sdivmod_optab),
4879                                          op0, op1,
4880                                          NULL_RTX, remainder, unsignedp))
4881                 remainder = 0;
4882             }
4883
4884           if (remainder)
4885             return gen_lowpart (mode, remainder);
4886         }
4887
4888       /* Produce the quotient.  Try a quotient insn, but not a library call.
4889          If we have a divmod in this mode, use it in preference to widening
4890          the div (for this test we assume it will not fail). Note that optab2
4891          is set to the one of the two optabs that the call below will use.  */
4892       quotient
4893         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4894                              op0, op1, rem_flag ? NULL_RTX : target,
4895                              unsignedp,
4896                              ((optab_handler (optab2, compute_mode)
4897                                != CODE_FOR_nothing)
4898                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4899
4900       if (quotient == 0)
4901         {
4902           /* No luck there.  Try a quotient-and-remainder insn,
4903              keeping the quotient alone.  */
4904           quotient = gen_reg_rtx (compute_mode);
4905           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4906                                      op0, op1,
4907                                      quotient, NULL_RTX, unsignedp))
4908             {
4909               quotient = 0;
4910               if (! rem_flag)
4911                 /* Still no luck.  If we are not computing the remainder,
4912                    use a library call for the quotient.  */
4913                 quotient = sign_expand_binop (compute_mode,
4914                                               udiv_optab, sdiv_optab,
4915                                               op0, op1, target,
4916                                               unsignedp, OPTAB_LIB_WIDEN);
4917             }
4918         }
4919     }
4920
4921   if (rem_flag)
4922     {
4923       if (target && GET_MODE (target) != compute_mode)
4924         target = 0;
4925
4926       if (quotient == 0)
4927         {
4928           /* No divide instruction either.  Use library for remainder.  */
4929           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4930                                          op0, op1, target,
4931                                          unsignedp, OPTAB_LIB_WIDEN);
4932           /* No remainder function.  Try a quotient-and-remainder
4933              function, keeping the remainder.  */
4934           if (!remainder)
4935             {
4936               remainder = gen_reg_rtx (compute_mode);
4937               if (!expand_twoval_binop_libfunc
4938                   (unsignedp ? udivmod_optab : sdivmod_optab,
4939                    op0, op1,
4940                    NULL_RTX, remainder,
4941                    unsignedp ? UMOD : MOD))
4942                 remainder = NULL_RTX;
4943             }
4944         }
4945       else
4946         {
4947           /* We divided.  Now finish doing X - Y * (X / Y).  */
4948           remainder = expand_mult (compute_mode, quotient, op1,
4949                                    NULL_RTX, unsignedp);
4950           remainder = expand_binop (compute_mode, sub_optab, op0,
4951                                     remainder, target, unsignedp,
4952                                     OPTAB_LIB_WIDEN);
4953         }
4954     }
4955
4956   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4957 }
4958 \f
4959 /* Return a tree node with data type TYPE, describing the value of X.
4960    Usually this is an VAR_DECL, if there is no obvious better choice.
4961    X may be an expression, however we only support those expressions
4962    generated by loop.c.  */
4963
4964 tree
4965 make_tree (tree type, rtx x)
4966 {
4967   tree t;
4968
4969   switch (GET_CODE (x))
4970     {
4971     case CONST_INT:
4972     case CONST_WIDE_INT:
4973       t = wide_int_to_tree (type, std::make_pair (x, TYPE_MODE (type)));
4974       return t;
4975
4976     case CONST_DOUBLE:
4977       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
4978       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
4979         t = wide_int_to_tree (type,
4980                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
4981                                                     HOST_BITS_PER_WIDE_INT * 2));
4982       else
4983         {
4984           REAL_VALUE_TYPE d;
4985
4986           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4987           t = build_real (type, d);
4988         }
4989
4990       return t;
4991
4992     case CONST_VECTOR:
4993       {
4994         int units = CONST_VECTOR_NUNITS (x);
4995         tree itype = TREE_TYPE (type);
4996         tree *elts;
4997         int i;
4998
4999         /* Build a tree with vector elements.  */
5000         elts = XALLOCAVEC (tree, units);
5001         for (i = units - 1; i >= 0; --i)
5002           {
5003             rtx elt = CONST_VECTOR_ELT (x, i);
5004             elts[i] = make_tree (itype, elt);
5005           }
5006
5007         return build_vector (type, elts);
5008       }
5009
5010     case PLUS:
5011       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5012                           make_tree (type, XEXP (x, 1)));
5013
5014     case MINUS:
5015       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5016                           make_tree (type, XEXP (x, 1)));
5017
5018     case NEG:
5019       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5020
5021     case MULT:
5022       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5023                           make_tree (type, XEXP (x, 1)));
5024
5025     case ASHIFT:
5026       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5027                           make_tree (type, XEXP (x, 1)));
5028
5029     case LSHIFTRT:
5030       t = unsigned_type_for (type);
5031       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5032                                          make_tree (t, XEXP (x, 0)),
5033                                          make_tree (type, XEXP (x, 1))));
5034
5035     case ASHIFTRT:
5036       t = signed_type_for (type);
5037       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5038                                          make_tree (t, XEXP (x, 0)),
5039                                          make_tree (type, XEXP (x, 1))));
5040
5041     case DIV:
5042       if (TREE_CODE (type) != REAL_TYPE)
5043         t = signed_type_for (type);
5044       else
5045         t = type;
5046
5047       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5048                                          make_tree (t, XEXP (x, 0)),
5049                                          make_tree (t, XEXP (x, 1))));
5050     case UDIV:
5051       t = unsigned_type_for (type);
5052       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5053                                          make_tree (t, XEXP (x, 0)),
5054                                          make_tree (t, XEXP (x, 1))));
5055
5056     case SIGN_EXTEND:
5057     case ZERO_EXTEND:
5058       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5059                                           GET_CODE (x) == ZERO_EXTEND);
5060       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5061
5062     case CONST:
5063       return make_tree (type, XEXP (x, 0));
5064
5065     case SYMBOL_REF:
5066       t = SYMBOL_REF_DECL (x);
5067       if (t)
5068         return fold_convert (type, build_fold_addr_expr (t));
5069       /* else fall through.  */
5070
5071     default:
5072       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5073
5074       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5075          address mode to pointer mode.  */
5076       if (POINTER_TYPE_P (type))
5077         x = convert_memory_address_addr_space
5078               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5079
5080       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5081          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5082       t->decl_with_rtl.rtl = x;
5083
5084       return t;
5085     }
5086 }
5087 \f
5088 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5089    and returning TARGET.
5090
5091    If TARGET is 0, a pseudo-register or constant is returned.  */
5092
5093 rtx
5094 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5095 {
5096   rtx tem = 0;
5097
5098   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5099     tem = simplify_binary_operation (AND, mode, op0, op1);
5100   if (tem == 0)
5101     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5102
5103   if (target == 0)
5104     target = tem;
5105   else if (tem != target)
5106     emit_move_insn (target, tem);
5107   return target;
5108 }
5109
5110 /* Helper function for emit_store_flag.  */
5111 rtx
5112 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5113              machine_mode mode, machine_mode compare_mode,
5114              int unsignedp, rtx x, rtx y, int normalizep,
5115              machine_mode target_mode)
5116 {
5117   struct expand_operand ops[4];
5118   rtx op0, comparison, subtarget;
5119   rtx_insn *last;
5120   machine_mode result_mode = targetm.cstore_mode (icode);
5121
5122   last = get_last_insn ();
5123   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5124   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5125   if (!x || !y)
5126     {
5127       delete_insns_since (last);
5128       return NULL_RTX;
5129     }
5130
5131   if (target_mode == VOIDmode)
5132     target_mode = result_mode;
5133   if (!target)
5134     target = gen_reg_rtx (target_mode);
5135
5136   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5137
5138   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5139   create_fixed_operand (&ops[1], comparison);
5140   create_fixed_operand (&ops[2], x);
5141   create_fixed_operand (&ops[3], y);
5142   if (!maybe_expand_insn (icode, 4, ops))
5143     {
5144       delete_insns_since (last);
5145       return NULL_RTX;
5146     }
5147   subtarget = ops[0].value;
5148
5149   /* If we are converting to a wider mode, first convert to
5150      TARGET_MODE, then normalize.  This produces better combining
5151      opportunities on machines that have a SIGN_EXTRACT when we are
5152      testing a single bit.  This mostly benefits the 68k.
5153
5154      If STORE_FLAG_VALUE does not have the sign bit set when
5155      interpreted in MODE, we can do this conversion as unsigned, which
5156      is usually more efficient.  */
5157   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5158     {
5159       convert_move (target, subtarget,
5160                     val_signbit_known_clear_p (result_mode,
5161                                                STORE_FLAG_VALUE));
5162       op0 = target;
5163       result_mode = target_mode;
5164     }
5165   else
5166     op0 = subtarget;
5167
5168   /* If we want to keep subexpressions around, don't reuse our last
5169      target.  */
5170   if (optimize)
5171     subtarget = 0;
5172
5173   /* Now normalize to the proper value in MODE.  Sometimes we don't
5174      have to do anything.  */
5175   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5176     ;
5177   /* STORE_FLAG_VALUE might be the most negative number, so write
5178      the comparison this way to avoid a compiler-time warning.  */
5179   else if (- normalizep == STORE_FLAG_VALUE)
5180     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5181
5182   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5183      it hard to use a value of just the sign bit due to ANSI integer
5184      constant typing rules.  */
5185   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5186     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5187                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5188                         normalizep == 1);
5189   else
5190     {
5191       gcc_assert (STORE_FLAG_VALUE & 1);
5192
5193       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5194       if (normalizep == -1)
5195         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5196     }
5197
5198   /* If we were converting to a smaller mode, do the conversion now.  */
5199   if (target_mode != result_mode)
5200     {
5201       convert_move (target, op0, 0);
5202       return target;
5203     }
5204   else
5205     return op0;
5206 }
5207
5208
5209 /* A subroutine of emit_store_flag only including "tricks" that do not
5210    need a recursive call.  These are kept separate to avoid infinite
5211    loops.  */
5212
5213 static rtx
5214 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5215                    machine_mode mode, int unsignedp, int normalizep,
5216                    machine_mode target_mode)
5217 {
5218   rtx subtarget;
5219   enum insn_code icode;
5220   machine_mode compare_mode;
5221   enum mode_class mclass;
5222   enum rtx_code scode;
5223   rtx tem;
5224
5225   if (unsignedp)
5226     code = unsigned_condition (code);
5227   scode = swap_condition (code);
5228
5229   /* If one operand is constant, make it the second one.  Only do this
5230      if the other operand is not constant as well.  */
5231
5232   if (swap_commutative_operands_p (op0, op1))
5233     {
5234       tem = op0;
5235       op0 = op1;
5236       op1 = tem;
5237       code = swap_condition (code);
5238     }
5239
5240   if (mode == VOIDmode)
5241     mode = GET_MODE (op0);
5242
5243   /* For some comparisons with 1 and -1, we can convert this to
5244      comparisons with zero.  This will often produce more opportunities for
5245      store-flag insns.  */
5246
5247   switch (code)
5248     {
5249     case LT:
5250       if (op1 == const1_rtx)
5251         op1 = const0_rtx, code = LE;
5252       break;
5253     case LE:
5254       if (op1 == constm1_rtx)
5255         op1 = const0_rtx, code = LT;
5256       break;
5257     case GE:
5258       if (op1 == const1_rtx)
5259         op1 = const0_rtx, code = GT;
5260       break;
5261     case GT:
5262       if (op1 == constm1_rtx)
5263         op1 = const0_rtx, code = GE;
5264       break;
5265     case GEU:
5266       if (op1 == const1_rtx)
5267         op1 = const0_rtx, code = NE;
5268       break;
5269     case LTU:
5270       if (op1 == const1_rtx)
5271         op1 = const0_rtx, code = EQ;
5272       break;
5273     default:
5274       break;
5275     }
5276
5277   /* If we are comparing a double-word integer with zero or -1, we can
5278      convert the comparison into one involving a single word.  */
5279   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5280       && GET_MODE_CLASS (mode) == MODE_INT
5281       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5282     {
5283       if ((code == EQ || code == NE)
5284           && (op1 == const0_rtx || op1 == constm1_rtx))
5285         {
5286           rtx op00, op01;
5287
5288           /* Do a logical OR or AND of the two words and compare the
5289              result.  */
5290           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5291           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5292           tem = expand_binop (word_mode,
5293                               op1 == const0_rtx ? ior_optab : and_optab,
5294                               op00, op01, NULL_RTX, unsignedp,
5295                               OPTAB_DIRECT);
5296
5297           if (tem != 0)
5298             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5299                                    unsignedp, normalizep);
5300         }
5301       else if ((code == LT || code == GE) && op1 == const0_rtx)
5302         {
5303           rtx op0h;
5304
5305           /* If testing the sign bit, can just test on high word.  */
5306           op0h = simplify_gen_subreg (word_mode, op0, mode,
5307                                       subreg_highpart_offset (word_mode,
5308                                                               mode));
5309           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5310                                  unsignedp, normalizep);
5311         }
5312       else
5313         tem = NULL_RTX;
5314
5315       if (tem)
5316         {
5317           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5318             return tem;
5319           if (!target)
5320             target = gen_reg_rtx (target_mode);
5321
5322           convert_move (target, tem,
5323                         !val_signbit_known_set_p (word_mode,
5324                                                   (normalizep ? normalizep
5325                                                    : STORE_FLAG_VALUE)));
5326           return target;
5327         }
5328     }
5329
5330   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5331      complement of A (for GE) and shifting the sign bit to the low bit.  */
5332   if (op1 == const0_rtx && (code == LT || code == GE)
5333       && GET_MODE_CLASS (mode) == MODE_INT
5334       && (normalizep || STORE_FLAG_VALUE == 1
5335           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5336     {
5337       subtarget = target;
5338
5339       if (!target)
5340         target_mode = mode;
5341
5342       /* If the result is to be wider than OP0, it is best to convert it
5343          first.  If it is to be narrower, it is *incorrect* to convert it
5344          first.  */
5345       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5346         {
5347           op0 = convert_modes (target_mode, mode, op0, 0);
5348           mode = target_mode;
5349         }
5350
5351       if (target_mode != mode)
5352         subtarget = 0;
5353
5354       if (code == GE)
5355         op0 = expand_unop (mode, one_cmpl_optab, op0,
5356                            ((STORE_FLAG_VALUE == 1 || normalizep)
5357                             ? 0 : subtarget), 0);
5358
5359       if (STORE_FLAG_VALUE == 1 || normalizep)
5360         /* If we are supposed to produce a 0/1 value, we want to do
5361            a logical shift from the sign bit to the low-order bit; for
5362            a -1/0 value, we do an arithmetic shift.  */
5363         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5364                             GET_MODE_BITSIZE (mode) - 1,
5365                             subtarget, normalizep != -1);
5366
5367       if (mode != target_mode)
5368         op0 = convert_modes (target_mode, mode, op0, 0);
5369
5370       return op0;
5371     }
5372
5373   mclass = GET_MODE_CLASS (mode);
5374   for (compare_mode = mode; compare_mode != VOIDmode;
5375        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5376     {
5377      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5378      icode = optab_handler (cstore_optab, optab_mode);
5379      if (icode != CODE_FOR_nothing)
5380         {
5381           do_pending_stack_adjust ();
5382           tem = emit_cstore (target, icode, code, mode, compare_mode,
5383                              unsignedp, op0, op1, normalizep, target_mode);
5384           if (tem)
5385             return tem;
5386
5387           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5388             {
5389               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5390                                  unsignedp, op1, op0, normalizep, target_mode);
5391               if (tem)
5392                 return tem;
5393             }
5394           break;
5395         }
5396     }
5397
5398   return 0;
5399 }
5400
5401 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5402    and storing in TARGET.  Normally return TARGET.
5403    Return 0 if that cannot be done.
5404
5405    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5406    it is VOIDmode, they cannot both be CONST_INT.
5407
5408    UNSIGNEDP is for the case where we have to widen the operands
5409    to perform the operation.  It says to use zero-extension.
5410
5411    NORMALIZEP is 1 if we should convert the result to be either zero
5412    or one.  Normalize is -1 if we should convert the result to be
5413    either zero or -1.  If NORMALIZEP is zero, the result will be left
5414    "raw" out of the scc insn.  */
5415
5416 rtx
5417 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5418                  machine_mode mode, int unsignedp, int normalizep)
5419 {
5420   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5421   enum rtx_code rcode;
5422   rtx subtarget;
5423   rtx tem, trueval;
5424   rtx_insn *last;
5425
5426   /* If we compare constants, we shouldn't use a store-flag operation,
5427      but a constant load.  We can get there via the vanilla route that
5428      usually generates a compare-branch sequence, but will in this case
5429      fold the comparison to a constant, and thus elide the branch.  */
5430   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5431     return NULL_RTX;
5432
5433   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5434                            target_mode);
5435   if (tem)
5436     return tem;
5437
5438   /* If we reached here, we can't do this with a scc insn, however there
5439      are some comparisons that can be done in other ways.  Don't do any
5440      of these cases if branches are very cheap.  */
5441   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5442     return 0;
5443
5444   /* See what we need to return.  We can only return a 1, -1, or the
5445      sign bit.  */
5446
5447   if (normalizep == 0)
5448     {
5449       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5450         normalizep = STORE_FLAG_VALUE;
5451
5452       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5453         ;
5454       else
5455         return 0;
5456     }
5457
5458   last = get_last_insn ();
5459
5460   /* If optimizing, use different pseudo registers for each insn, instead
5461      of reusing the same pseudo.  This leads to better CSE, but slows
5462      down the compiler, since there are more pseudos */
5463   subtarget = (!optimize
5464                && (target_mode == mode)) ? target : NULL_RTX;
5465   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5466
5467   /* For floating-point comparisons, try the reverse comparison or try
5468      changing the "orderedness" of the comparison.  */
5469   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5470     {
5471       enum rtx_code first_code;
5472       bool and_them;
5473
5474       rcode = reverse_condition_maybe_unordered (code);
5475       if (can_compare_p (rcode, mode, ccp_store_flag)
5476           && (code == ORDERED || code == UNORDERED
5477               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5478               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5479         {
5480           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5481                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5482
5483           /* For the reverse comparison, use either an addition or a XOR.  */
5484           if (want_add
5485               && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5486                            optimize_insn_for_speed_p ()) == 0)
5487             {
5488               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5489                                        STORE_FLAG_VALUE, target_mode);
5490               if (tem)
5491                 return expand_binop (target_mode, add_optab, tem,
5492                                      gen_int_mode (normalizep, target_mode),
5493                                      target, 0, OPTAB_WIDEN);
5494             }
5495           else if (!want_add
5496                    && rtx_cost (trueval, XOR, 1,
5497                                 optimize_insn_for_speed_p ()) == 0)
5498             {
5499               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5500                                        normalizep, target_mode);
5501               if (tem)
5502                 return expand_binop (target_mode, xor_optab, tem, trueval,
5503                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5504             }
5505         }
5506
5507       delete_insns_since (last);
5508
5509       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5510       if (code == ORDERED || code == UNORDERED)
5511         return 0;
5512
5513       and_them = split_comparison (code, mode, &first_code, &code);
5514
5515       /* If there are no NaNs, the first comparison should always fall through.
5516          Effectively change the comparison to the other one.  */
5517       if (!HONOR_NANS (mode))
5518         {
5519           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5520           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5521                                     target_mode);
5522         }
5523
5524 #ifdef HAVE_conditional_move
5525       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5526          conditional move.  */
5527       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5528                                normalizep, target_mode);
5529       if (tem == 0)
5530         return 0;
5531
5532       if (and_them)
5533         tem = emit_conditional_move (target, code, op0, op1, mode,
5534                                      tem, const0_rtx, GET_MODE (tem), 0);
5535       else
5536         tem = emit_conditional_move (target, code, op0, op1, mode,
5537                                      trueval, tem, GET_MODE (tem), 0);
5538
5539       if (tem == 0)
5540         delete_insns_since (last);
5541       return tem;
5542 #else
5543       return 0;
5544 #endif
5545     }
5546
5547   /* The remaining tricks only apply to integer comparisons.  */
5548
5549   if (GET_MODE_CLASS (mode) != MODE_INT)
5550     return 0;
5551
5552   /* If this is an equality comparison of integers, we can try to exclusive-or
5553      (or subtract) the two operands and use a recursive call to try the
5554      comparison with zero.  Don't do any of these cases if branches are
5555      very cheap.  */
5556
5557   if ((code == EQ || code == NE) && op1 != const0_rtx)
5558     {
5559       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5560                           OPTAB_WIDEN);
5561
5562       if (tem == 0)
5563         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5564                             OPTAB_WIDEN);
5565       if (tem != 0)
5566         tem = emit_store_flag (target, code, tem, const0_rtx,
5567                                mode, unsignedp, normalizep);
5568       if (tem != 0)
5569         return tem;
5570
5571       delete_insns_since (last);
5572     }
5573
5574   /* For integer comparisons, try the reverse comparison.  However, for
5575      small X and if we'd have anyway to extend, implementing "X != 0"
5576      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5577   rcode = reverse_condition (code);
5578   if (can_compare_p (rcode, mode, ccp_store_flag)
5579       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5580             && code == NE
5581             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5582             && op1 == const0_rtx))
5583     {
5584       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5585                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5586
5587       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5588       if (want_add
5589           && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5590                        optimize_insn_for_speed_p ()) == 0)
5591         {
5592           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5593                                    STORE_FLAG_VALUE, target_mode);
5594           if (tem != 0)
5595             tem = expand_binop (target_mode, add_optab, tem,
5596                                 gen_int_mode (normalizep, target_mode),
5597                                 target, 0, OPTAB_WIDEN);
5598         }
5599       else if (!want_add
5600                && rtx_cost (trueval, XOR, 1,
5601                             optimize_insn_for_speed_p ()) == 0)
5602         {
5603           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5604                                    normalizep, target_mode);
5605           if (tem != 0)
5606             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5607                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5608         }
5609
5610       if (tem != 0)
5611         return tem;
5612       delete_insns_since (last);
5613     }
5614
5615   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5616      the constant zero.  Reject all other comparisons at this point.  Only
5617      do LE and GT if branches are expensive since they are expensive on
5618      2-operand machines.  */
5619
5620   if (op1 != const0_rtx
5621       || (code != EQ && code != NE
5622           && (BRANCH_COST (optimize_insn_for_speed_p (),
5623                            false) <= 1 || (code != LE && code != GT))))
5624     return 0;
5625
5626   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5627      do the necessary operation below.  */
5628
5629   tem = 0;
5630
5631   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5632      the sign bit set.  */
5633
5634   if (code == LE)
5635     {
5636       /* This is destructive, so SUBTARGET can't be OP0.  */
5637       if (rtx_equal_p (subtarget, op0))
5638         subtarget = 0;
5639
5640       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5641                           OPTAB_WIDEN);
5642       if (tem)
5643         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5644                             OPTAB_WIDEN);
5645     }
5646
5647   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5648      number of bits in the mode of OP0, minus one.  */
5649
5650   if (code == GT)
5651     {
5652       if (rtx_equal_p (subtarget, op0))
5653         subtarget = 0;
5654
5655       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5656                           GET_MODE_BITSIZE (mode) - 1,
5657                           subtarget, 0);
5658       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5659                           OPTAB_WIDEN);
5660     }
5661
5662   if (code == EQ || code == NE)
5663     {
5664       /* For EQ or NE, one way to do the comparison is to apply an operation
5665          that converts the operand into a positive number if it is nonzero
5666          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5667          for NE we negate.  This puts the result in the sign bit.  Then we
5668          normalize with a shift, if needed.
5669
5670          Two operations that can do the above actions are ABS and FFS, so try
5671          them.  If that doesn't work, and MODE is smaller than a full word,
5672          we can use zero-extension to the wider mode (an unsigned conversion)
5673          as the operation.  */
5674
5675       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5676          that is compensated by the subsequent overflow when subtracting
5677          one / negating.  */
5678
5679       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5680         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5681       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5682         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5683       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5684         {
5685           tem = convert_modes (word_mode, mode, op0, 1);
5686           mode = word_mode;
5687         }
5688
5689       if (tem != 0)
5690         {
5691           if (code == EQ)
5692             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5693                                 0, OPTAB_WIDEN);
5694           else
5695             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5696         }
5697
5698       /* If we couldn't do it that way, for NE we can "or" the two's complement
5699          of the value with itself.  For EQ, we take the one's complement of
5700          that "or", which is an extra insn, so we only handle EQ if branches
5701          are expensive.  */
5702
5703       if (tem == 0
5704           && (code == NE
5705               || BRANCH_COST (optimize_insn_for_speed_p (),
5706                               false) > 1))
5707         {
5708           if (rtx_equal_p (subtarget, op0))
5709             subtarget = 0;
5710
5711           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5712           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5713                               OPTAB_WIDEN);
5714
5715           if (tem && code == EQ)
5716             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5717         }
5718     }
5719
5720   if (tem && normalizep)
5721     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5722                         GET_MODE_BITSIZE (mode) - 1,
5723                         subtarget, normalizep == 1);
5724
5725   if (tem)
5726     {
5727       if (!target)
5728         ;
5729       else if (GET_MODE (tem) != target_mode)
5730         {
5731           convert_move (target, tem, 0);
5732           tem = target;
5733         }
5734       else if (!subtarget)
5735         {
5736           emit_move_insn (target, tem);
5737           tem = target;
5738         }
5739     }
5740   else
5741     delete_insns_since (last);
5742
5743   return tem;
5744 }
5745
5746 /* Like emit_store_flag, but always succeeds.  */
5747
5748 rtx
5749 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5750                        machine_mode mode, int unsignedp, int normalizep)
5751 {
5752   rtx tem;
5753   rtx_code_label *label;
5754   rtx trueval, falseval;
5755
5756   /* First see if emit_store_flag can do the job.  */
5757   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5758   if (tem != 0)
5759     return tem;
5760
5761   if (!target)
5762     target = gen_reg_rtx (word_mode);
5763
5764   /* If this failed, we have to do this with set/compare/jump/set code.
5765      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5766   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5767   if (code == NE
5768       && GET_MODE_CLASS (mode) == MODE_INT
5769       && REG_P (target)
5770       && op0 == target
5771       && op1 == const0_rtx)
5772     {
5773       label = gen_label_rtx ();
5774       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5775                                mode, NULL_RTX, NULL_RTX, label, -1);
5776       emit_move_insn (target, trueval);
5777       emit_label (label);
5778       return target;
5779     }
5780
5781   if (!REG_P (target)
5782       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5783     target = gen_reg_rtx (GET_MODE (target));
5784
5785   /* Jump in the right direction if the target cannot implement CODE
5786      but can jump on its reverse condition.  */
5787   falseval = const0_rtx;
5788   if (! can_compare_p (code, mode, ccp_jump)
5789       && (! FLOAT_MODE_P (mode)
5790           || code == ORDERED || code == UNORDERED
5791           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5792           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5793     {
5794       enum rtx_code rcode;
5795       if (FLOAT_MODE_P (mode))
5796         rcode = reverse_condition_maybe_unordered (code);
5797       else
5798         rcode = reverse_condition (code);
5799
5800       /* Canonicalize to UNORDERED for the libcall.  */
5801       if (can_compare_p (rcode, mode, ccp_jump)
5802           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5803         {
5804           falseval = trueval;
5805           trueval = const0_rtx;
5806           code = rcode;
5807         }
5808     }
5809
5810   emit_move_insn (target, trueval);
5811   label = gen_label_rtx ();
5812   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5813                            NULL_RTX, label, -1);
5814
5815   emit_move_insn (target, falseval);
5816   emit_label (label);
5817
5818   return target;
5819 }
5820 \f
5821 /* Perform possibly multi-word comparison and conditional jump to LABEL
5822    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5823    now a thin wrapper around do_compare_rtx_and_jump.  */
5824
5825 static void
5826 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
5827                  rtx_code_label *label)
5828 {
5829   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5830   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5831                            NULL_RTX, NULL_RTX, label, -1);
5832 }