gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2015 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "diagnostic-core.h"
  27 #include "rtl.h"
  28 #include "input.h"
  29 #include "alias.h"
  30 #include "symtab.h"
  31 #include "tree.h"
  32 #include "fold-const.h"
  33 #include "stor-layout.h"
  34 #include "tm_p.h"
  35 #include "flags.h"
  36 #include "insn-config.h"
  37 #include "hard-reg-set.h"
  38 #include "function.h"
  39 #include "expmed.h"
  40 #include "dojump.h"
  41 #include "explow.h"
  42 #include "calls.h"
  43 #include "emit-rtl.h"
  44 #include "varasm.h"
  45 #include "stmt.h"
  46 #include "expr.h"
  47 #include "insn-codes.h"
  48 #include "optabs.h"
  49 #include "recog.h"
  50 #include "langhooks.h"
  51 #include "predict.h"
  52 #include "basic-block.h"
  53 #include "df.h"
  54 #include "target.h"
  55
  56 struct target_expmed default_target_expmed;
  57 #if SWITCHABLE_TARGET
  58 struct target_expmed *this_target_expmed = &default_target_expmed;
  59 #endif
  60
  61 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  62                                    unsigned HOST_WIDE_INT,
  63                                    unsigned HOST_WIDE_INT,
  64                                    unsigned HOST_WIDE_INT,
  65                                    rtx);
  66 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
  67                                      unsigned HOST_WIDE_INT,
  68                                      rtx);
  69 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  70                                    unsigned HOST_WIDE_INT,
  71                                    unsigned HOST_WIDE_INT,
  72                                    unsigned HOST_WIDE_INT,
  73                                    rtx);
  74 static rtx extract_fixed_bit_field (machine_mode, rtx,
  75                                     unsigned HOST_WIDE_INT,
  76                                     unsigned HOST_WIDE_INT, rtx, int);
  77 static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
  78                                       unsigned HOST_WIDE_INT,
  79                                       unsigned HOST_WIDE_INT, rtx, int);
  80 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  81 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  82                                     unsigned HOST_WIDE_INT, int);
  83 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  84 static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  85 static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  86
  87 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  88    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  89    The mask is truncated if necessary to the width of mode MODE.  The
  90    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
  91
  92 static inline rtx
  93 mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
  94 {
  95   return immed_wide_int_const
  96     (wi::shifted_mask (bitpos, bitsize, complement,
  97                        GET_MODE_PRECISION (mode)), mode);
  98 }
  99
 100 /* Test whether a value is zero of a power of two.  */
 101 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
 102   (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
 103
 104 struct init_expmed_rtl
 105 {
 106   rtx reg;
 107   rtx plus;
 108   rtx neg;
 109   rtx mult;
 110   rtx sdiv;
 111   rtx udiv;
 112   rtx sdiv_32;
 113   rtx smod_32;
 114   rtx wide_mult;
 115   rtx wide_lshr;
 116   rtx wide_trunc;
 117   rtx shift;
 118   rtx shift_mult;
 119   rtx shift_add;
 120   rtx shift_sub0;
 121   rtx shift_sub1;
 122   rtx zext;
 123   rtx trunc;
 124
 125   rtx pow2[MAX_BITS_PER_WORD];
 126   rtx cint[MAX_BITS_PER_WORD];
 127 };
 128
 129 static void
 130 init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
 131                       machine_mode from_mode, bool speed)
 132 {
 133   int to_size, from_size;
 134   rtx which;
 135
 136   to_size = GET_MODE_PRECISION (to_mode);
 137   from_size = GET_MODE_PRECISION (from_mode);
 138
 139   /* Most partial integers have a precision less than the "full"
 140      integer it requires for storage.  In case one doesn't, for
 141      comparison purposes here, reduce the bit size by one in that
 142      case.  */
 143   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 144       && exact_log2 (to_size) != -1)
 145     to_size --;
 146   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 147       && exact_log2 (from_size) != -1)
 148     from_size --;
 149
 150   /* Assume cost of zero-extend and sign-extend is the same.  */
 151   which = (to_size < from_size ? all->trunc : all->zext);
 152
 153   PUT_MODE (all->reg, from_mode);
 154   set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
 155 }
 156
 157 static void
 158 init_expmed_one_mode (struct init_expmed_rtl *all,
 159                       machine_mode mode, int speed)
 160 {
 161   int m, n, mode_bitsize;
 162   machine_mode mode_from;
 163
 164   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 165
 166   PUT_MODE (all->reg, mode);
 167   PUT_MODE (all->plus, mode);
 168   PUT_MODE (all->neg, mode);
 169   PUT_MODE (all->mult, mode);
 170   PUT_MODE (all->sdiv, mode);
 171   PUT_MODE (all->udiv, mode);
 172   PUT_MODE (all->sdiv_32, mode);
 173   PUT_MODE (all->smod_32, mode);
 174   PUT_MODE (all->wide_trunc, mode);
 175   PUT_MODE (all->shift, mode);
 176   PUT_MODE (all->shift_mult, mode);
 177   PUT_MODE (all->shift_add, mode);
 178   PUT_MODE (all->shift_sub0, mode);
 179   PUT_MODE (all->shift_sub1, mode);
 180   PUT_MODE (all->zext, mode);
 181   PUT_MODE (all->trunc, mode);
 182
 183   set_add_cost (speed, mode, set_src_cost (all->plus, speed));
 184   set_neg_cost (speed, mode, set_src_cost (all->neg, speed));
 185   set_mul_cost (speed, mode, set_src_cost (all->mult, speed));
 186   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, speed));
 187   set_udiv_cost (speed, mode, set_src_cost (all->udiv, speed));
 188
 189   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, speed)
 190                                      <= 2 * add_cost (speed, mode)));
 191   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, speed)
 192                                      <= 4 * add_cost (speed, mode)));
 193
 194   set_shift_cost (speed, mode, 0, 0);
 195   {
 196     int cost = add_cost (speed, mode);
 197     set_shiftadd_cost (speed, mode, 0, cost);
 198     set_shiftsub0_cost (speed, mode, 0, cost);
 199     set_shiftsub1_cost (speed, mode, 0, cost);
 200   }
 201
 202   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 203   for (m = 1; m < n; m++)
 204     {
 205       XEXP (all->shift, 1) = all->cint[m];
 206       XEXP (all->shift_mult, 1) = all->pow2[m];
 207
 208       set_shift_cost (speed, mode, m, set_src_cost (all->shift, speed));
 209       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, speed));
 210       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, speed));
 211       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, speed));
 212     }
 213
 214   if (SCALAR_INT_MODE_P (mode))
 215     {
 216       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 217            mode_from = (machine_mode)(mode_from + 1))
 218         init_expmed_one_conv (all, mode, mode_from, speed);
 219     }
 220   if (GET_MODE_CLASS (mode) == MODE_INT)
 221     {
 222       machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
 223       if (wider_mode != VOIDmode)
 224         {
 225           PUT_MODE (all->zext, wider_mode);
 226           PUT_MODE (all->wide_mult, wider_mode);
 227           PUT_MODE (all->wide_lshr, wider_mode);
 228           XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 229
 230           set_mul_widen_cost (speed, wider_mode,
 231                               set_src_cost (all->wide_mult, speed));
 232           set_mul_highpart_cost (speed, mode,
 233                                  set_src_cost (all->wide_trunc, speed));
 234         }
 235     }
 236 }
 237
 238 void
 239 init_expmed (void)
 240 {
 241   struct init_expmed_rtl all;
 242   machine_mode mode = QImode;
 243   int m, speed;
 244
 245   memset (&all, 0, sizeof all);
 246   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 247     {
 248       all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 249       all.cint[m] = GEN_INT (m);
 250     }
 251
 252   /* Avoid using hard regs in ways which may be unsupported.  */
 253   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 254   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 255   all.neg = gen_rtx_NEG (mode, all.reg);
 256   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 257   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 258   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 259   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 260   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 261   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 262   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 263   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 264   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 265   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 266   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 267   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 268   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 269   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 270   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 271
 272   for (speed = 0; speed < 2; speed++)
 273     {
 274       crtl->maybe_hot_insn_p = speed;
 275       set_zero_cost (speed, set_src_cost (const0_rtx, speed));
 276
 277       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 278            mode = (machine_mode)(mode + 1))
 279         init_expmed_one_mode (&all, mode, speed);
 280
 281       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 282         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 283              mode = (machine_mode)(mode + 1))
 284           init_expmed_one_mode (&all, mode, speed);
 285
 286       if (MIN_MODE_VECTOR_INT != VOIDmode)
 287         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 288              mode = (machine_mode)(mode + 1))
 289           init_expmed_one_mode (&all, mode, speed);
 290     }
 291
 292   if (alg_hash_used_p ())
 293     {
 294       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 295       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 296     }
 297   else
 298     set_alg_hash_used_p (true);
 299   default_rtl_profile ();
 300
 301   ggc_free (all.trunc);
 302   ggc_free (all.shift_sub1);
 303   ggc_free (all.shift_sub0);
 304   ggc_free (all.shift_add);
 305   ggc_free (all.shift_mult);
 306   ggc_free (all.shift);
 307   ggc_free (all.wide_trunc);
 308   ggc_free (all.wide_lshr);
 309   ggc_free (all.wide_mult);
 310   ggc_free (all.zext);
 311   ggc_free (all.smod_32);
 312   ggc_free (all.sdiv_32);
 313   ggc_free (all.udiv);
 314   ggc_free (all.sdiv);
 315   ggc_free (all.mult);
 316   ggc_free (all.neg);
 317   ggc_free (all.plus);
 318   ggc_free (all.reg);
 319 }
 320
 321 /* Return an rtx representing minus the value of X.
 322    MODE is the intended mode of the result,
 323    useful if X is a CONST_INT.  */
 324
 325 rtx
 326 negate_rtx (machine_mode mode, rtx x)
 327 {
 328   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 329
 330   if (result == 0)
 331     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 332
 333   return result;
 334 }
 335
 336 /* Adjust bitfield memory MEM so that it points to the first unit of mode
 337    MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
 338    If MODE is BLKmode, return a reference to every byte in the bitfield.
 339    Set *NEW_BITNUM to the bit position of the field within the new memory.  */
 340
 341 static rtx
 342 narrow_bit_field_mem (rtx mem, machine_mode mode,
 343                       unsigned HOST_WIDE_INT bitsize,
 344                       unsigned HOST_WIDE_INT bitnum,
 345                       unsigned HOST_WIDE_INT *new_bitnum)
 346 {
 347   if (mode == BLKmode)
 348     {
 349       *new_bitnum = bitnum % BITS_PER_UNIT;
 350       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 351       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 352                             / BITS_PER_UNIT);
 353       return adjust_bitfield_address_size (mem, mode, offset, size);
 354     }
 355   else
 356     {
 357       unsigned int unit = GET_MODE_BITSIZE (mode);
 358       *new_bitnum = bitnum % unit;
 359       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 360       return adjust_bitfield_address (mem, mode, offset);
 361     }
 362 }
 363
 364 /* The caller wants to perform insertion or extraction PATTERN on a
 365    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 366    BITREGION_START and BITREGION_END are as for store_bit_field
 367    and FIELDMODE is the natural mode of the field.
 368
 369    Search for a mode that is compatible with the memory access
 370    restrictions and (where applicable) with a register insertion or
 371    extraction.  Return the new memory on success, storing the adjusted
 372    bit position in *NEW_BITNUM.  Return null otherwise.  */
 373
 374 static rtx
 375 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 376                               rtx op0, HOST_WIDE_INT bitsize,
 377                               HOST_WIDE_INT bitnum,
 378                               unsigned HOST_WIDE_INT bitregion_start,
 379                               unsigned HOST_WIDE_INT bitregion_end,
 380                               machine_mode fieldmode,
 381                               unsigned HOST_WIDE_INT *new_bitnum)
 382 {
 383   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 384                                 bitregion_end, MEM_ALIGN (op0),
 385                                 MEM_VOLATILE_P (op0));
 386   machine_mode best_mode;
 387   if (iter.next_mode (&best_mode))
 388     {
 389       /* We can use a memory in BEST_MODE.  See whether this is true for
 390          any wider modes.  All other things being equal, we prefer to
 391          use the widest mode possible because it tends to expose more
 392          CSE opportunities.  */
 393       if (!iter.prefer_smaller_modes ())
 394         {
 395           /* Limit the search to the mode required by the corresponding
 396              register insertion or extraction instruction, if any.  */
 397           machine_mode limit_mode = word_mode;
 398           extraction_insn insn;
 399           if (get_best_reg_extraction_insn (&insn, pattern,
 400                                             GET_MODE_BITSIZE (best_mode),
 401                                             fieldmode))
 402             limit_mode = insn.field_mode;
 403
 404           machine_mode wider_mode;
 405           while (iter.next_mode (&wider_mode)
 406                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 407             best_mode = wider_mode;
 408         }
 409       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 410                                    new_bitnum);
 411     }
 412   return NULL_RTX;
 413 }
 414
 415 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 416    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 417    offset is then BITNUM / BITS_PER_UNIT.  */
 418
 419 static bool
 420 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 421                      unsigned HOST_WIDE_INT bitsize,
 422                      machine_mode struct_mode)
 423 {
 424   if (BYTES_BIG_ENDIAN)
 425     return (bitnum % BITS_PER_UNIT == 0
 426             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 427                 || (bitnum + bitsize) % BITS_PER_WORD == 0));
 428   else
 429     return bitnum % BITS_PER_WORD == 0;
 430 }
 431
 432 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 433    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 434    Return false if the access would touch memory outside the range
 435    BITREGION_START to BITREGION_END for conformance to the C++ memory
 436    model.  */
 437
 438 static bool
 439 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 440                             unsigned HOST_WIDE_INT bitnum,
 441                             machine_mode fieldmode,
 442                             unsigned HOST_WIDE_INT bitregion_start,
 443                             unsigned HOST_WIDE_INT bitregion_end)
 444 {
 445   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 446
 447   /* -fstrict-volatile-bitfields must be enabled and we must have a
 448      volatile MEM.  */
 449   if (!MEM_P (op0)
 450       || !MEM_VOLATILE_P (op0)
 451       || flag_strict_volatile_bitfields <= 0)
 452     return false;
 453
 454   /* Non-integral modes likely only happen with packed structures.
 455      Punt.  */
 456   if (!SCALAR_INT_MODE_P (fieldmode))
 457     return false;
 458
 459   /* The bit size must not be larger than the field mode, and
 460      the field mode must not be larger than a word.  */
 461   if (bitsize > modesize || modesize > BITS_PER_WORD)
 462     return false;
 463
 464   /* Check for cases of unaligned fields that must be split.  */
 465   if (bitnum % modesize + bitsize > modesize)
 466     return false;
 467
 468   /* The memory must be sufficiently aligned for a MODESIZE access.
 469      This condition guarantees, that the memory access will not
 470      touch anything after the end of the structure.  */
 471   if (MEM_ALIGN (op0) < modesize)
 472     return false;
 473
 474   /* Check for cases where the C++ memory model applies.  */
 475   if (bitregion_end != 0
 476       && (bitnum - bitnum % modesize < bitregion_start
 477           || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
 478     return false;
 479
 480   return true;
 481 }
 482
 483 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 484    bit number BITNUM can be treated as a simple value of mode MODE.  */
 485
 486 static bool
 487 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 488                        unsigned HOST_WIDE_INT bitnum, machine_mode mode)
 489 {
 490   return (MEM_P (op0)
 491           && bitnum % BITS_PER_UNIT == 0
 492           && bitsize == GET_MODE_BITSIZE (mode)
 493           && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
 494               || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
 495                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 496 }
 497 \f
 498 /* Try to use instruction INSV to store VALUE into a field of OP0.
 499    BITSIZE and BITNUM are as for store_bit_field.  */
 500
 501 static bool
 502 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 503                             unsigned HOST_WIDE_INT bitsize,
 504                             unsigned HOST_WIDE_INT bitnum,
 505                             rtx value)
 506 {
 507   struct expand_operand ops[4];
 508   rtx value1;
 509   rtx xop0 = op0;
 510   rtx_insn *last = get_last_insn ();
 511   bool copy_back = false;
 512
 513   machine_mode op_mode = insv->field_mode;
 514   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 515   if (bitsize == 0 || bitsize > unit)
 516     return false;
 517
 518   if (MEM_P (xop0))
 519     /* Get a reference to the first byte of the field.  */
 520     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 521                                  &bitnum);
 522   else
 523     {
 524       /* Convert from counting within OP0 to counting in OP_MODE.  */
 525       if (BYTES_BIG_ENDIAN)
 526         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 527
 528       /* If xop0 is a register, we need it in OP_MODE
 529          to make it acceptable to the format of insv.  */
 530       if (GET_CODE (xop0) == SUBREG)
 531         /* We can't just change the mode, because this might clobber op0,
 532            and we will need the original value of op0 if insv fails.  */
 533         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 534       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 535         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 536     }
 537
 538   /* If the destination is a paradoxical subreg such that we need a
 539      truncate to the inner mode, perform the insertion on a temporary and
 540      truncate the result to the original destination.  Note that we can't
 541      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 542      X) 0)) is (reg:N X).  */
 543   if (GET_CODE (xop0) == SUBREG
 544       && REG_P (SUBREG_REG (xop0))
 545       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 546                                          op_mode))
 547     {
 548       rtx tem = gen_reg_rtx (op_mode);
 549       emit_move_insn (tem, xop0);
 550       xop0 = tem;
 551       copy_back = true;
 552     }
 553
 554   /* There are similar overflow check at the start of store_bit_field_1,
 555      but that only check the situation where the field lies completely
 556      outside the register, while there do have situation where the field
 557      lies partialy in the register, we need to adjust bitsize for this
 558      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
 559      will broken on those arch support bit insert instruction, like arm, aarch64
 560      etc.  */
 561   if (bitsize + bitnum > unit && bitnum < unit)
 562     {
 563       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
 564                "destination object, data truncated into %wu-bit",
 565                bitsize, unit - bitnum);
 566       bitsize = unit - bitnum;
 567     }
 568
 569   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 570      "backwards" from the size of the unit we are inserting into.
 571      Otherwise, we count bits from the most significant on a
 572      BYTES/BITS_BIG_ENDIAN machine.  */
 573
 574   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 575     bitnum = unit - bitsize - bitnum;
 576
 577   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 578   value1 = value;
 579   if (GET_MODE (value) != op_mode)
 580     {
 581       if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 582         {
 583           /* Optimization: Don't bother really extending VALUE
 584              if it has all the bits we will actually use.  However,
 585              if we must narrow it, be sure we do it correctly.  */
 586
 587           if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 588             {
 589               rtx tmp;
 590
 591               tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 592               if (! tmp)
 593                 tmp = simplify_gen_subreg (op_mode,
 594                                            force_reg (GET_MODE (value),
 595                                                       value1),
 596                                            GET_MODE (value), 0);
 597               value1 = tmp;
 598             }
 599           else
 600             value1 = gen_lowpart (op_mode, value1);
 601         }
 602       else if (CONST_INT_P (value))
 603         value1 = gen_int_mode (INTVAL (value), op_mode);
 604       else
 605         /* Parse phase is supposed to make VALUE's data type
 606            match that of the component reference, which is a type
 607            at least as wide as the field; so VALUE should have
 608            a mode that corresponds to that type.  */
 609         gcc_assert (CONSTANT_P (value));
 610     }
 611
 612   create_fixed_operand (&ops[0], xop0);
 613   create_integer_operand (&ops[1], bitsize);
 614   create_integer_operand (&ops[2], bitnum);
 615   create_input_operand (&ops[3], value1, op_mode);
 616   if (maybe_expand_insn (insv->icode, 4, ops))
 617     {
 618       if (copy_back)
 619         convert_move (op0, xop0, true);
 620       return true;
 621     }
 622   delete_insns_since (last);
 623   return false;
 624 }
 625
 626 /* A subroutine of store_bit_field, with the same arguments.  Return true
 627    if the operation could be implemented.
 628
 629    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 630    no other way of implementing the operation.  If FALLBACK_P is false,
 631    return false instead.  */
 632
 633 static bool
 634 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 635                    unsigned HOST_WIDE_INT bitnum,
 636                    unsigned HOST_WIDE_INT bitregion_start,
 637                    unsigned HOST_WIDE_INT bitregion_end,
 638                    machine_mode fieldmode,
 639                    rtx value, bool fallback_p)
 640 {
 641   rtx op0 = str_rtx;
 642   rtx orig_value;
 643
 644   while (GET_CODE (op0) == SUBREG)
 645     {
 646       /* The following line once was done only if WORDS_BIG_ENDIAN,
 647          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 648          meaningful at a much higher level; when structures are copied
 649          between memory and regs, the higher-numbered regs
 650          always get higher addresses.  */
 651       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 652       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 653       int byte_offset = 0;
 654
 655       /* Paradoxical subregs need special handling on big endian machines.  */
 656       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 657         {
 658           int difference = inner_mode_size - outer_mode_size;
 659
 660           if (WORDS_BIG_ENDIAN)
 661             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 662           if (BYTES_BIG_ENDIAN)
 663             byte_offset += difference % UNITS_PER_WORD;
 664         }
 665       else
 666         byte_offset = SUBREG_BYTE (op0);
 667
 668       bitnum += byte_offset * BITS_PER_UNIT;
 669       op0 = SUBREG_REG (op0);
 670     }
 671
 672   /* No action is needed if the target is a register and if the field
 673      lies completely outside that register.  This can occur if the source
 674      code contains an out-of-bounds access to a small array.  */
 675   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 676     return true;
 677
 678   /* Use vec_set patterns for inserting parts of vectors whenever
 679      available.  */
 680   if (VECTOR_MODE_P (GET_MODE (op0))
 681       && !MEM_P (op0)
 682       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 683       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 684       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 685       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 686     {
 687       struct expand_operand ops[3];
 688       machine_mode outermode = GET_MODE (op0);
 689       machine_mode innermode = GET_MODE_INNER (outermode);
 690       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 691       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 692
 693       create_fixed_operand (&ops[0], op0);
 694       create_input_operand (&ops[1], value, innermode);
 695       create_integer_operand (&ops[2], pos);
 696       if (maybe_expand_insn (icode, 3, ops))
 697         return true;
 698     }
 699
 700   /* If the target is a register, overwriting the entire object, or storing
 701      a full-word or multi-word field can be done with just a SUBREG.  */
 702   if (!MEM_P (op0)
 703       && bitsize == GET_MODE_BITSIZE (fieldmode)
 704       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 705           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 706     {
 707       /* Use the subreg machinery either to narrow OP0 to the required
 708          words or to cope with mode punning between equal-sized modes.
 709          In the latter case, use subreg on the rhs side, not lhs.  */
 710       rtx sub;
 711
 712       if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
 713         {
 714           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 715           if (sub)
 716             {
 717               emit_move_insn (op0, sub);
 718               return true;
 719             }
 720         }
 721       else
 722         {
 723           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 724                                      bitnum / BITS_PER_UNIT);
 725           if (sub)
 726             {
 727               emit_move_insn (sub, value);
 728               return true;
 729             }
 730         }
 731     }
 732
 733   /* If the target is memory, storing any naturally aligned field can be
 734      done with a simple store.  For targets that support fast unaligned
 735      memory, any naturally sized, unit aligned field can be done directly.  */
 736   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
 737     {
 738       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 739       emit_move_insn (op0, value);
 740       return true;
 741     }
 742
 743   /* Make sure we are playing with integral modes.  Pun with subregs
 744      if we aren't.  This must come after the entire register case above,
 745      since that case is valid for any mode.  The following cases are only
 746      valid for integral modes.  */
 747   {
 748     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 749     if (imode != GET_MODE (op0))
 750       {
 751         if (MEM_P (op0))
 752           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
 753         else
 754           {
 755             gcc_assert (imode != BLKmode);
 756             op0 = gen_lowpart (imode, op0);
 757           }
 758       }
 759   }
 760
 761   /* Storing an lsb-aligned field in a register
 762      can be done with a movstrict instruction.  */
 763
 764   if (!MEM_P (op0)
 765       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 766       && bitsize == GET_MODE_BITSIZE (fieldmode)
 767       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 768     {
 769       struct expand_operand ops[2];
 770       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 771       rtx arg0 = op0;
 772       unsigned HOST_WIDE_INT subreg_off;
 773
 774       if (GET_CODE (arg0) == SUBREG)
 775         {
 776           /* Else we've got some float mode source being extracted into
 777              a different float mode destination -- this combination of
 778              subregs results in Severe Tire Damage.  */
 779           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 780                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 781                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 782           arg0 = SUBREG_REG (arg0);
 783         }
 784
 785       subreg_off = bitnum / BITS_PER_UNIT;
 786       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 787         {
 788           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 789
 790           create_fixed_operand (&ops[0], arg0);
 791           /* Shrink the source operand to FIELDMODE.  */
 792           create_convert_operand_to (&ops[1], value, fieldmode, false);
 793           if (maybe_expand_insn (icode, 2, ops))
 794             return true;
 795         }
 796     }
 797
 798   /* Handle fields bigger than a word.  */
 799
 800   if (bitsize > BITS_PER_WORD)
 801     {
 802       /* Here we transfer the words of the field
 803          in the order least significant first.
 804          This is because the most significant word is the one which may
 805          be less than full.
 806          However, only do that if the value is not BLKmode.  */
 807
 808       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 809       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 810       unsigned int i;
 811       rtx_insn *last;
 812
 813       /* This is the mode we must force value to, so that there will be enough
 814          subwords to extract.  Note that fieldmode will often (always?) be
 815          VOIDmode, because that is what store_field uses to indicate that this
 816          is a bit field, but passing VOIDmode to operand_subword_force
 817          is not allowed.  */
 818       fieldmode = GET_MODE (value);
 819       if (fieldmode == VOIDmode)
 820         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 821
 822       last = get_last_insn ();
 823       for (i = 0; i < nwords; i++)
 824         {
 825           /* If I is 0, use the low-order word in both field and target;
 826              if I is 1, use the next to lowest word; and so on.  */
 827           unsigned int wordnum = (backwards
 828                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 829                                   - i - 1
 830                                   : i);
 831           unsigned int bit_offset = (backwards
 832                                      ? MAX ((int) bitsize - ((int) i + 1)
 833                                             * BITS_PER_WORD,
 834                                             0)
 835                                      : (int) i * BITS_PER_WORD);
 836           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 837           unsigned HOST_WIDE_INT new_bitsize =
 838             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 839
 840           /* If the remaining chunk doesn't have full wordsize we have
 841              to make sure that for big endian machines the higher order
 842              bits are used.  */
 843           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 844             value_word = simplify_expand_binop (word_mode, lshr_optab,
 845                                                 value_word,
 846                                                 GEN_INT (BITS_PER_WORD
 847                                                          - new_bitsize),
 848                                                 NULL_RTX, true,
 849                                                 OPTAB_LIB_WIDEN);
 850
 851           if (!store_bit_field_1 (op0, new_bitsize,
 852                                   bitnum + bit_offset,
 853                                   bitregion_start, bitregion_end,
 854                                   word_mode,
 855                                   value_word, fallback_p))
 856             {
 857               delete_insns_since (last);
 858               return false;
 859             }
 860         }
 861       return true;
 862     }
 863
 864   /* If VALUE has a floating-point or complex mode, access it as an
 865      integer of the corresponding size.  This can occur on a machine
 866      with 64 bit registers that uses SFmode for float.  It can also
 867      occur for unaligned float or complex fields.  */
 868   orig_value = value;
 869   if (GET_MODE (value) != VOIDmode
 870       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 871       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 872     {
 873       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 874       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 875     }
 876
 877   /* If OP0 is a multi-word register, narrow it to the affected word.
 878      If the region spans two words, defer to store_split_bit_field.  */
 879   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 880     {
 881       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
 882                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 883       gcc_assert (op0);
 884       bitnum %= BITS_PER_WORD;
 885       if (bitnum + bitsize > BITS_PER_WORD)
 886         {
 887           if (!fallback_p)
 888             return false;
 889
 890           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 891                                  bitregion_end, value);
 892           return true;
 893         }
 894     }
 895
 896   /* From here on we can assume that the field to be stored in fits
 897      within a word.  If the destination is a register, it too fits
 898      in a word.  */
 899
 900   extraction_insn insv;
 901   if (!MEM_P (op0)
 902       && get_best_reg_extraction_insn (&insv, EP_insv,
 903                                        GET_MODE_BITSIZE (GET_MODE (op0)),
 904                                        fieldmode)
 905       && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 906     return true;
 907
 908   /* If OP0 is a memory, try copying it to a register and seeing if a
 909      cheap register alternative is available.  */
 910   if (MEM_P (op0))
 911     {
 912       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
 913                                         fieldmode)
 914           && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 915         return true;
 916
 917       rtx_insn *last = get_last_insn ();
 918
 919       /* Try loading part of OP0 into a register, inserting the bitfield
 920          into that, and then copying the result back to OP0.  */
 921       unsigned HOST_WIDE_INT bitpos;
 922       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
 923                                                bitregion_start, bitregion_end,
 924                                                fieldmode, &bitpos);
 925       if (xop0)
 926         {
 927           rtx tempreg = copy_to_reg (xop0);
 928           if (store_bit_field_1 (tempreg, bitsize, bitpos,
 929                                  bitregion_start, bitregion_end,
 930                                  fieldmode, orig_value, false))
 931             {
 932               emit_move_insn (xop0, tempreg);
 933               return true;
 934             }
 935           delete_insns_since (last);
 936         }
 937     }
 938
 939   if (!fallback_p)
 940     return false;
 941
 942   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
 943                          bitregion_end, value);
 944   return true;
 945 }
 946
 947 /* Generate code to store value from rtx VALUE
 948    into a bit-field within structure STR_RTX
 949    containing BITSIZE bits starting at bit BITNUM.
 950
 951    BITREGION_START is bitpos of the first bitfield in this region.
 952    BITREGION_END is the bitpos of the ending bitfield in this region.
 953    These two fields are 0, if the C++ memory model does not apply,
 954    or we are not interested in keeping track of bitfield regions.
 955
 956    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 957
 958 void
 959 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 960                  unsigned HOST_WIDE_INT bitnum,
 961                  unsigned HOST_WIDE_INT bitregion_start,
 962                  unsigned HOST_WIDE_INT bitregion_end,
 963                  machine_mode fieldmode,
 964                  rtx value)
 965 {
 966   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
 967   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
 968                                   bitregion_start, bitregion_end))
 969     {
 970       /* Storing of a full word can be done with a simple store.
 971          We know here that the field can be accessed with one single
 972          instruction.  For targets that support unaligned memory,
 973          an unaligned access may be necessary.  */
 974       if (bitsize == GET_MODE_BITSIZE (fieldmode))
 975         {
 976           str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
 977                                              bitnum / BITS_PER_UNIT);
 978           gcc_assert (bitnum % BITS_PER_UNIT == 0);
 979           emit_move_insn (str_rtx, value);
 980         }
 981       else
 982         {
 983           rtx temp;
 984
 985           str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
 986                                           &bitnum);
 987           gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (fieldmode));
 988           temp = copy_to_reg (str_rtx);
 989           if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
 990                                   fieldmode, value, true))
 991             gcc_unreachable ();
 992
 993           emit_move_insn (str_rtx, temp);
 994         }
 995
 996       return;
 997     }
 998
 999   /* Under the C++0x memory model, we must not touch bits outside the
1000      bit region.  Adjust the address to start at the beginning of the
1001      bit region.  */
1002   if (MEM_P (str_rtx) && bitregion_start > 0)
1003     {
1004       machine_mode bestmode;
1005       HOST_WIDE_INT offset, size;
1006
1007       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1008
1009       offset = bitregion_start / BITS_PER_UNIT;
1010       bitnum -= bitregion_start;
1011       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1012       bitregion_end -= bitregion_start;
1013       bitregion_start = 0;
1014       bestmode = get_best_mode (bitsize, bitnum,
1015                                 bitregion_start, bitregion_end,
1016                                 MEM_ALIGN (str_rtx), VOIDmode,
1017                                 MEM_VOLATILE_P (str_rtx));
1018       str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
1019     }
1020
1021   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1022                           bitregion_start, bitregion_end,
1023                           fieldmode, value, true))
1024     gcc_unreachable ();
1025 }
1026 \f
1027 /* Use shifts and boolean operations to store VALUE into a bit field of
1028    width BITSIZE in OP0, starting at bit BITNUM.  */
1029
1030 static void
1031 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1032                        unsigned HOST_WIDE_INT bitnum,
1033                        unsigned HOST_WIDE_INT bitregion_start,
1034                        unsigned HOST_WIDE_INT bitregion_end,
1035                        rtx value)
1036 {
1037   /* There is a case not handled here:
1038      a structure with a known alignment of just a halfword
1039      and a field split across two aligned halfwords within the structure.
1040      Or likewise a structure with a known alignment of just a byte
1041      and a field split across two bytes.
1042      Such cases are not supposed to be able to occur.  */
1043
1044   if (MEM_P (op0))
1045     {
1046       machine_mode mode = GET_MODE (op0);
1047       if (GET_MODE_BITSIZE (mode) == 0
1048           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1049         mode = word_mode;
1050       mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1051                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1052
1053       if (mode == VOIDmode)
1054         {
1055           /* The only way this should occur is if the field spans word
1056              boundaries.  */
1057           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1058                                  bitregion_end, value);
1059           return;
1060         }
1061
1062       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1063     }
1064
1065   store_fixed_bit_field_1 (op0, bitsize, bitnum, value);
1066 }
1067
1068 /* Helper function for store_fixed_bit_field, stores
1069    the bit field always using the MODE of OP0.  */
1070
1071 static void
1072 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1073                          unsigned HOST_WIDE_INT bitnum,
1074                          rtx value)
1075 {
1076   machine_mode mode;
1077   rtx temp;
1078   int all_zero = 0;
1079   int all_one = 0;
1080
1081   mode = GET_MODE (op0);
1082   gcc_assert (SCALAR_INT_MODE_P (mode));
1083
1084   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1085      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1086
1087   if (BYTES_BIG_ENDIAN)
1088     /* BITNUM is the distance between our msb
1089        and that of the containing datum.
1090        Convert it to the distance from the lsb.  */
1091     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1092
1093   /* Now BITNUM is always the distance between our lsb
1094      and that of OP0.  */
1095
1096   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1097      we must first convert its mode to MODE.  */
1098
1099   if (CONST_INT_P (value))
1100     {
1101       unsigned HOST_WIDE_INT v = UINTVAL (value);
1102
1103       if (bitsize < HOST_BITS_PER_WIDE_INT)
1104         v &= ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1;
1105
1106       if (v == 0)
1107         all_zero = 1;
1108       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1109                 && v == ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1)
1110                || (bitsize == HOST_BITS_PER_WIDE_INT
1111                    && v == (unsigned HOST_WIDE_INT) -1))
1112         all_one = 1;
1113
1114       value = lshift_value (mode, v, bitnum);
1115     }
1116   else
1117     {
1118       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1119                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1120
1121       if (GET_MODE (value) != mode)
1122         value = convert_to_mode (mode, value, 1);
1123
1124       if (must_and)
1125         value = expand_binop (mode, and_optab, value,
1126                               mask_rtx (mode, 0, bitsize, 0),
1127                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1128       if (bitnum > 0)
1129         value = expand_shift (LSHIFT_EXPR, mode, value,
1130                               bitnum, NULL_RTX, 1);
1131     }
1132
1133   /* Now clear the chosen bits in OP0,
1134      except that if VALUE is -1 we need not bother.  */
1135   /* We keep the intermediates in registers to allow CSE to combine
1136      consecutive bitfield assignments.  */
1137
1138   temp = force_reg (mode, op0);
1139
1140   if (! all_one)
1141     {
1142       temp = expand_binop (mode, and_optab, temp,
1143                            mask_rtx (mode, bitnum, bitsize, 1),
1144                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1145       temp = force_reg (mode, temp);
1146     }
1147
1148   /* Now logical-or VALUE into OP0, unless it is zero.  */
1149
1150   if (! all_zero)
1151     {
1152       temp = expand_binop (mode, ior_optab, temp, value,
1153                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1154       temp = force_reg (mode, temp);
1155     }
1156
1157   if (op0 != temp)
1158     {
1159       op0 = copy_rtx (op0);
1160       emit_move_insn (op0, temp);
1161     }
1162 }
1163 \f
1164 /* Store a bit field that is split across multiple accessible memory objects.
1165
1166    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1167    BITSIZE is the field width; BITPOS the position of its first bit
1168    (within the word).
1169    VALUE is the value to store.
1170
1171    This does not yet handle fields wider than BITS_PER_WORD.  */
1172
1173 static void
1174 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1175                        unsigned HOST_WIDE_INT bitpos,
1176                        unsigned HOST_WIDE_INT bitregion_start,
1177                        unsigned HOST_WIDE_INT bitregion_end,
1178                        rtx value)
1179 {
1180   unsigned int unit;
1181   unsigned int bitsdone = 0;
1182
1183   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1184      much at a time.  */
1185   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1186     unit = BITS_PER_WORD;
1187   else
1188     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1189
1190   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1191      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1192      again, and we will mutually recurse forever.  */
1193   if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1194     unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1195
1196   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1197      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1198      that VALUE might be a floating-point constant.  */
1199   if (CONSTANT_P (value) && !CONST_INT_P (value))
1200     {
1201       rtx word = gen_lowpart_common (word_mode, value);
1202
1203       if (word && (value != word))
1204         value = word;
1205       else
1206         value = gen_lowpart_common (word_mode,
1207                                     force_reg (GET_MODE (value) != VOIDmode
1208                                                ? GET_MODE (value)
1209                                                : word_mode, value));
1210     }
1211
1212   while (bitsdone < bitsize)
1213     {
1214       unsigned HOST_WIDE_INT thissize;
1215       rtx part, word;
1216       unsigned HOST_WIDE_INT thispos;
1217       unsigned HOST_WIDE_INT offset;
1218
1219       offset = (bitpos + bitsdone) / unit;
1220       thispos = (bitpos + bitsdone) % unit;
1221
1222       /* When region of bytes we can touch is restricted, decrease
1223          UNIT close to the end of the region as needed.  If op0 is a REG
1224          or SUBREG of REG, don't do this, as there can't be data races
1225          on a register and we can expand shorter code in some cases.  */
1226       if (bitregion_end
1227           && unit > BITS_PER_UNIT
1228           && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1229           && !REG_P (op0)
1230           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1231         {
1232           unit = unit / 2;
1233           continue;
1234         }
1235
1236       /* THISSIZE must not overrun a word boundary.  Otherwise,
1237          store_fixed_bit_field will call us again, and we will mutually
1238          recurse forever.  */
1239       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1240       thissize = MIN (thissize, unit - thispos);
1241
1242       if (BYTES_BIG_ENDIAN)
1243         {
1244           /* Fetch successively less significant portions.  */
1245           if (CONST_INT_P (value))
1246             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1247                              >> (bitsize - bitsdone - thissize))
1248                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1249           else
1250             {
1251               int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1252               /* The args are chosen so that the last part includes the
1253                  lsb.  Give extract_bit_field the value it needs (with
1254                  endianness compensation) to fetch the piece we want.  */
1255               part = extract_fixed_bit_field (word_mode, value, thissize,
1256                                               total_bits - bitsize + bitsdone,
1257                                               NULL_RTX, 1);
1258             }
1259         }
1260       else
1261         {
1262           /* Fetch successively more significant portions.  */
1263           if (CONST_INT_P (value))
1264             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1265                              >> bitsdone)
1266                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1267           else
1268             part = extract_fixed_bit_field (word_mode, value, thissize,
1269                                             bitsdone, NULL_RTX, 1);
1270         }
1271
1272       /* If OP0 is a register, then handle OFFSET here.
1273
1274          When handling multiword bitfields, extract_bit_field may pass
1275          down a word_mode SUBREG of a larger REG for a bitfield that actually
1276          crosses a word boundary.  Thus, for a SUBREG, we must find
1277          the current word starting from the base register.  */
1278       if (GET_CODE (op0) == SUBREG)
1279         {
1280           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD)
1281                             + (offset * unit / BITS_PER_WORD);
1282           machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1283           if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1284             word = word_offset ? const0_rtx : op0;
1285           else
1286             word = operand_subword_force (SUBREG_REG (op0), word_offset,
1287                                           GET_MODE (SUBREG_REG (op0)));
1288           offset &= BITS_PER_WORD / unit - 1;
1289         }
1290       else if (REG_P (op0))
1291         {
1292           machine_mode op0_mode = GET_MODE (op0);
1293           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1294             word = offset ? const0_rtx : op0;
1295           else
1296             word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1297                                           GET_MODE (op0));
1298           offset &= BITS_PER_WORD / unit - 1;
1299         }
1300       else
1301         word = op0;
1302
1303       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1304          it is just an out-of-bounds access.  Ignore it.  */
1305       if (word != const0_rtx)
1306         store_fixed_bit_field (word, thissize, offset * unit + thispos,
1307                                bitregion_start, bitregion_end, part);
1308       bitsdone += thissize;
1309     }
1310 }
1311 \f
1312 /* A subroutine of extract_bit_field_1 that converts return value X
1313    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1314    to extract_bit_field.  */
1315
1316 static rtx
1317 convert_extracted_bit_field (rtx x, machine_mode mode,
1318                              machine_mode tmode, bool unsignedp)
1319 {
1320   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1321     return x;
1322
1323   /* If the x mode is not a scalar integral, first convert to the
1324      integer mode of that size and then access it as a floating-point
1325      value via a SUBREG.  */
1326   if (!SCALAR_INT_MODE_P (tmode))
1327     {
1328       machine_mode smode;
1329
1330       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1331       x = convert_to_mode (smode, x, unsignedp);
1332       x = force_reg (smode, x);
1333       return gen_lowpart (tmode, x);
1334     }
1335
1336   return convert_to_mode (tmode, x, unsignedp);
1337 }
1338
1339 /* Try to use an ext(z)v pattern to extract a field from OP0.
1340    Return the extracted value on success, otherwise return null.
1341    EXT_MODE is the mode of the extraction and the other arguments
1342    are as for extract_bit_field.  */
1343
1344 static rtx
1345 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1346                               unsigned HOST_WIDE_INT bitsize,
1347                               unsigned HOST_WIDE_INT bitnum,
1348                               int unsignedp, rtx target,
1349                               machine_mode mode, machine_mode tmode)
1350 {
1351   struct expand_operand ops[4];
1352   rtx spec_target = target;
1353   rtx spec_target_subreg = 0;
1354   machine_mode ext_mode = extv->field_mode;
1355   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1356
1357   if (bitsize == 0 || unit < bitsize)
1358     return NULL_RTX;
1359
1360   if (MEM_P (op0))
1361     /* Get a reference to the first byte of the field.  */
1362     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1363                                 &bitnum);
1364   else
1365     {
1366       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1367       if (BYTES_BIG_ENDIAN)
1368         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1369
1370       /* If op0 is a register, we need it in EXT_MODE to make it
1371          acceptable to the format of ext(z)v.  */
1372       if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1373         return NULL_RTX;
1374       if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1375         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1376     }
1377
1378   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1379      "backwards" from the size of the unit we are extracting from.
1380      Otherwise, we count bits from the most significant on a
1381      BYTES/BITS_BIG_ENDIAN machine.  */
1382
1383   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1384     bitnum = unit - bitsize - bitnum;
1385
1386   if (target == 0)
1387     target = spec_target = gen_reg_rtx (tmode);
1388
1389   if (GET_MODE (target) != ext_mode)
1390     {
1391       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1392          between the mode of the extraction (word_mode) and the target
1393          mode.  Instead, create a temporary and use convert_move to set
1394          the target.  */
1395       if (REG_P (target)
1396           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1397         {
1398           target = gen_lowpart (ext_mode, target);
1399           if (GET_MODE_PRECISION (ext_mode)
1400               > GET_MODE_PRECISION (GET_MODE (spec_target)))
1401             spec_target_subreg = target;
1402         }
1403       else
1404         target = gen_reg_rtx (ext_mode);
1405     }
1406
1407   create_output_operand (&ops[0], target, ext_mode);
1408   create_fixed_operand (&ops[1], op0);
1409   create_integer_operand (&ops[2], bitsize);
1410   create_integer_operand (&ops[3], bitnum);
1411   if (maybe_expand_insn (extv->icode, 4, ops))
1412     {
1413       target = ops[0].value;
1414       if (target == spec_target)
1415         return target;
1416       if (target == spec_target_subreg)
1417         return spec_target;
1418       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1419     }
1420   return NULL_RTX;
1421 }
1422
1423 /* A subroutine of extract_bit_field, with the same arguments.
1424    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1425    if we can find no other means of implementing the operation.
1426    if FALLBACK_P is false, return NULL instead.  */
1427
1428 static rtx
1429 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1430                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1431                      machine_mode mode, machine_mode tmode,
1432                      bool fallback_p)
1433 {
1434   rtx op0 = str_rtx;
1435   machine_mode int_mode;
1436   machine_mode mode1;
1437
1438   if (tmode == VOIDmode)
1439     tmode = mode;
1440
1441   while (GET_CODE (op0) == SUBREG)
1442     {
1443       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1444       op0 = SUBREG_REG (op0);
1445     }
1446
1447   /* If we have an out-of-bounds access to a register, just return an
1448      uninitialized register of the required mode.  This can occur if the
1449      source code contains an out-of-bounds access to a small array.  */
1450   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1451     return gen_reg_rtx (tmode);
1452
1453   if (REG_P (op0)
1454       && mode == GET_MODE (op0)
1455       && bitnum == 0
1456       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1457     {
1458       /* We're trying to extract a full register from itself.  */
1459       return op0;
1460     }
1461
1462   /* See if we can get a better vector mode before extracting.  */
1463   if (VECTOR_MODE_P (GET_MODE (op0))
1464       && !MEM_P (op0)
1465       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1466     {
1467       machine_mode new_mode;
1468
1469       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1470         new_mode = MIN_MODE_VECTOR_FLOAT;
1471       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1472         new_mode = MIN_MODE_VECTOR_FRACT;
1473       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1474         new_mode = MIN_MODE_VECTOR_UFRACT;
1475       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1476         new_mode = MIN_MODE_VECTOR_ACCUM;
1477       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1478         new_mode = MIN_MODE_VECTOR_UACCUM;
1479       else
1480         new_mode = MIN_MODE_VECTOR_INT;
1481
1482       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1483         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1484             && targetm.vector_mode_supported_p (new_mode))
1485           break;
1486       if (new_mode != VOIDmode)
1487         op0 = gen_lowpart (new_mode, op0);
1488     }
1489
1490   /* Use vec_extract patterns for extracting parts of vectors whenever
1491      available.  */
1492   if (VECTOR_MODE_P (GET_MODE (op0))
1493       && !MEM_P (op0)
1494       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1495       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1496           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1497     {
1498       struct expand_operand ops[3];
1499       machine_mode outermode = GET_MODE (op0);
1500       machine_mode innermode = GET_MODE_INNER (outermode);
1501       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1502       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1503
1504       create_output_operand (&ops[0], target, innermode);
1505       create_input_operand (&ops[1], op0, outermode);
1506       create_integer_operand (&ops[2], pos);
1507       if (maybe_expand_insn (icode, 3, ops))
1508         {
1509           target = ops[0].value;
1510           if (GET_MODE (target) != mode)
1511             return gen_lowpart (tmode, target);
1512           return target;
1513         }
1514     }
1515
1516   /* Make sure we are playing with integral modes.  Pun with subregs
1517      if we aren't.  */
1518   {
1519     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1520     if (imode != GET_MODE (op0))
1521       {
1522         if (MEM_P (op0))
1523           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1524         else if (imode != BLKmode)
1525           {
1526             op0 = gen_lowpart (imode, op0);
1527
1528             /* If we got a SUBREG, force it into a register since we
1529                aren't going to be able to do another SUBREG on it.  */
1530             if (GET_CODE (op0) == SUBREG)
1531               op0 = force_reg (imode, op0);
1532           }
1533         else if (REG_P (op0))
1534           {
1535             rtx reg, subreg;
1536             imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1537                                             MODE_INT);
1538             reg = gen_reg_rtx (imode);
1539             subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1540             emit_move_insn (subreg, op0);
1541             op0 = reg;
1542             bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1543           }
1544         else
1545           {
1546             HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1547             rtx mem = assign_stack_temp (GET_MODE (op0), size);
1548             emit_move_insn (mem, op0);
1549             op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1550           }
1551       }
1552   }
1553
1554   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1555      If that's wrong, the solution is to test for it and set TARGET to 0
1556      if needed.  */
1557
1558   /* Get the mode of the field to use for atomic access or subreg
1559      conversion.  */
1560   mode1 = mode;
1561   if (SCALAR_INT_MODE_P (tmode))
1562     {
1563       machine_mode try_mode = mode_for_size (bitsize,
1564                                                   GET_MODE_CLASS (tmode), 0);
1565       if (try_mode != BLKmode)
1566         mode1 = try_mode;
1567     }
1568   gcc_assert (mode1 != BLKmode);
1569
1570   /* Extraction of a full MODE1 value can be done with a subreg as long
1571      as the least significant bit of the value is the least significant
1572      bit of either OP0 or a word of OP0.  */
1573   if (!MEM_P (op0)
1574       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1575       && bitsize == GET_MODE_BITSIZE (mode1)
1576       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1577     {
1578       rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1579                                      bitnum / BITS_PER_UNIT);
1580       if (sub)
1581         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1582     }
1583
1584   /* Extraction of a full MODE1 value can be done with a load as long as
1585      the field is on a byte boundary and is sufficiently aligned.  */
1586   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1587     {
1588       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1589       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1590     }
1591
1592   /* Handle fields bigger than a word.  */
1593
1594   if (bitsize > BITS_PER_WORD)
1595     {
1596       /* Here we transfer the words of the field
1597          in the order least significant first.
1598          This is because the most significant word is the one which may
1599          be less than full.  */
1600
1601       unsigned int backwards = WORDS_BIG_ENDIAN;
1602       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1603       unsigned int i;
1604       rtx_insn *last;
1605
1606       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1607         target = gen_reg_rtx (mode);
1608
1609       /* In case we're about to clobber a base register or something
1610          (see gcc.c-torture/execute/20040625-1.c).   */
1611       if (reg_mentioned_p (target, str_rtx))
1612         target = gen_reg_rtx (mode);
1613
1614       /* Indicate for flow that the entire target reg is being set.  */
1615       emit_clobber (target);
1616
1617       last = get_last_insn ();
1618       for (i = 0; i < nwords; i++)
1619         {
1620           /* If I is 0, use the low-order word in both field and target;
1621              if I is 1, use the next to lowest word; and so on.  */
1622           /* Word number in TARGET to use.  */
1623           unsigned int wordnum
1624             = (backwards
1625                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1626                : i);
1627           /* Offset from start of field in OP0.  */
1628           unsigned int bit_offset = (backwards
1629                                      ? MAX ((int) bitsize - ((int) i + 1)
1630                                             * BITS_PER_WORD,
1631                                             0)
1632                                      : (int) i * BITS_PER_WORD);
1633           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1634           rtx result_part
1635             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1636                                              bitsize - i * BITS_PER_WORD),
1637                                    bitnum + bit_offset, 1, target_part,
1638                                    mode, word_mode, fallback_p);
1639
1640           gcc_assert (target_part);
1641           if (!result_part)
1642             {
1643               delete_insns_since (last);
1644               return NULL;
1645             }
1646
1647           if (result_part != target_part)
1648             emit_move_insn (target_part, result_part);
1649         }
1650
1651       if (unsignedp)
1652         {
1653           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1654              need to be zero'd out.  */
1655           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1656             {
1657               unsigned int i, total_words;
1658
1659               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1660               for (i = nwords; i < total_words; i++)
1661                 emit_move_insn
1662                   (operand_subword (target,
1663                                     backwards ? total_words - i - 1 : i,
1664                                     1, VOIDmode),
1665                    const0_rtx);
1666             }
1667           return target;
1668         }
1669
1670       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1671       target = expand_shift (LSHIFT_EXPR, mode, target,
1672                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1673       return expand_shift (RSHIFT_EXPR, mode, target,
1674                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1675     }
1676
1677   /* If OP0 is a multi-word register, narrow it to the affected word.
1678      If the region spans two words, defer to extract_split_bit_field.  */
1679   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1680     {
1681       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1682                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1683       bitnum %= BITS_PER_WORD;
1684       if (bitnum + bitsize > BITS_PER_WORD)
1685         {
1686           if (!fallback_p)
1687             return NULL_RTX;
1688           target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1689           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1690         }
1691     }
1692
1693   /* From here on we know the desired field is smaller than a word.
1694      If OP0 is a register, it too fits within a word.  */
1695   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1696   extraction_insn extv;
1697   if (!MEM_P (op0)
1698       /* ??? We could limit the structure size to the part of OP0 that
1699          contains the field, with appropriate checks for endianness
1700          and TRULY_NOOP_TRUNCATION.  */
1701       && get_best_reg_extraction_insn (&extv, pattern,
1702                                        GET_MODE_BITSIZE (GET_MODE (op0)),
1703                                        tmode))
1704     {
1705       rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1706                                                  unsignedp, target, mode,
1707                                                  tmode);
1708       if (result)
1709         return result;
1710     }
1711
1712   /* If OP0 is a memory, try copying it to a register and seeing if a
1713      cheap register alternative is available.  */
1714   if (MEM_P (op0))
1715     {
1716       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1717                                         tmode))
1718         {
1719           rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1720                                                      bitnum, unsignedp,
1721                                                      target, mode,
1722                                                      tmode);
1723           if (result)
1724             return result;
1725         }
1726
1727       rtx_insn *last = get_last_insn ();
1728
1729       /* Try loading part of OP0 into a register and extracting the
1730          bitfield from that.  */
1731       unsigned HOST_WIDE_INT bitpos;
1732       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1733                                                0, 0, tmode, &bitpos);
1734       if (xop0)
1735         {
1736           xop0 = copy_to_reg (xop0);
1737           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1738                                             unsignedp, target,
1739                                             mode, tmode, false);
1740           if (result)
1741             return result;
1742           delete_insns_since (last);
1743         }
1744     }
1745
1746   if (!fallback_p)
1747     return NULL;
1748
1749   /* Find a correspondingly-sized integer field, so we can apply
1750      shifts and masks to it.  */
1751   int_mode = int_mode_for_mode (tmode);
1752   if (int_mode == BLKmode)
1753     int_mode = int_mode_for_mode (mode);
1754   /* Should probably push op0 out to memory and then do a load.  */
1755   gcc_assert (int_mode != BLKmode);
1756
1757   target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
1758                                     target, unsignedp);
1759   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1760 }
1761
1762 /* Generate code to extract a byte-field from STR_RTX
1763    containing BITSIZE bits, starting at BITNUM,
1764    and put it in TARGET if possible (if TARGET is nonzero).
1765    Regardless of TARGET, we return the rtx for where the value is placed.
1766
1767    STR_RTX is the structure containing the byte (a REG or MEM).
1768    UNSIGNEDP is nonzero if this is an unsigned bit field.
1769    MODE is the natural mode of the field value once extracted.
1770    TMODE is the mode the caller would like the value to have;
1771    but the value may be returned with type MODE instead.
1772
1773    If a TARGET is specified and we can store in it at no extra cost,
1774    we do so, and return TARGET.
1775    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1776    if they are equally easy.  */
1777
1778 rtx
1779 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1780                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1781                    machine_mode mode, machine_mode tmode)
1782 {
1783   machine_mode mode1;
1784
1785   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1786   if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1787     mode1 = GET_MODE (str_rtx);
1788   else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1789     mode1 = GET_MODE (target);
1790   else
1791     mode1 = tmode;
1792
1793   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1794     {
1795       /* Extraction of a full MODE1 value can be done with a simple load.
1796          We know here that the field can be accessed with one single
1797          instruction.  For targets that support unaligned memory,
1798          an unaligned access may be necessary.  */
1799       if (bitsize == GET_MODE_BITSIZE (mode1))
1800         {
1801           rtx result = adjust_bitfield_address (str_rtx, mode1,
1802                                                 bitnum / BITS_PER_UNIT);
1803           gcc_assert (bitnum % BITS_PER_UNIT == 0);
1804           return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1805         }
1806
1807       str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1808                                       &bitnum);
1809       gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (mode1));
1810       str_rtx = copy_to_reg (str_rtx);
1811     }
1812
1813   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1814                               target, mode, tmode, true);
1815 }
1816 \f
1817 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1818    from bit BITNUM of OP0.
1819
1820    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1821    If TARGET is nonzero, attempts to store the value there
1822    and return TARGET, but this is not guaranteed.
1823    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1824
1825 static rtx
1826 extract_fixed_bit_field (machine_mode tmode, rtx op0,
1827                          unsigned HOST_WIDE_INT bitsize,
1828                          unsigned HOST_WIDE_INT bitnum, rtx target,
1829                          int unsignedp)
1830 {
1831   if (MEM_P (op0))
1832     {
1833       machine_mode mode
1834         = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1835                          MEM_VOLATILE_P (op0));
1836
1837       if (mode == VOIDmode)
1838         /* The only way this should occur is if the field spans word
1839            boundaries.  */
1840         return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1841
1842       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1843     }
1844
1845   return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1846                                     target, unsignedp);
1847 }
1848
1849 /* Helper function for extract_fixed_bit_field, extracts
1850    the bit field always using the MODE of OP0.  */
1851
1852 static rtx
1853 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
1854                            unsigned HOST_WIDE_INT bitsize,
1855                            unsigned HOST_WIDE_INT bitnum, rtx target,
1856                            int unsignedp)
1857 {
1858   machine_mode mode = GET_MODE (op0);
1859   gcc_assert (SCALAR_INT_MODE_P (mode));
1860
1861   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1862      for invalid input, such as extract equivalent of f5 from
1863      gcc.dg/pr48335-2.c.  */
1864
1865   if (BYTES_BIG_ENDIAN)
1866     /* BITNUM is the distance between our msb and that of OP0.
1867        Convert it to the distance from the lsb.  */
1868     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1869
1870   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1871      We have reduced the big-endian case to the little-endian case.  */
1872
1873   if (unsignedp)
1874     {
1875       if (bitnum)
1876         {
1877           /* If the field does not already start at the lsb,
1878              shift it so it does.  */
1879           /* Maybe propagate the target for the shift.  */
1880           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1881           if (tmode != mode)
1882             subtarget = 0;
1883           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1884         }
1885       /* Convert the value to the desired mode.  */
1886       if (mode != tmode)
1887         op0 = convert_to_mode (tmode, op0, 1);
1888
1889       /* Unless the msb of the field used to be the msb when we shifted,
1890          mask out the upper bits.  */
1891
1892       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
1893         return expand_binop (GET_MODE (op0), and_optab, op0,
1894                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1895                              target, 1, OPTAB_LIB_WIDEN);
1896       return op0;
1897     }
1898
1899   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1900      then arithmetic-shift its lsb to the lsb of the word.  */
1901   op0 = force_reg (mode, op0);
1902
1903   /* Find the narrowest integer mode that contains the field.  */
1904
1905   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1906        mode = GET_MODE_WIDER_MODE (mode))
1907     if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
1908       {
1909         op0 = convert_to_mode (mode, op0, 0);
1910         break;
1911       }
1912
1913   if (mode != tmode)
1914     target = 0;
1915
1916   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
1917     {
1918       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
1919       /* Maybe propagate the target for the shift.  */
1920       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1921       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1922     }
1923
1924   return expand_shift (RSHIFT_EXPR, mode, op0,
1925                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1926 }
1927
1928 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1929    VALUE << BITPOS.  */
1930
1931 static rtx
1932 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
1933               int bitpos)
1934 {
1935   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
1936 }
1937 \f
1938 /* Extract a bit field that is split across two words
1939    and return an RTX for the result.
1940
1941    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1942    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1943    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1944
1945 static rtx
1946 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1947                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1948 {
1949   unsigned int unit;
1950   unsigned int bitsdone = 0;
1951   rtx result = NULL_RTX;
1952   int first = 1;
1953
1954   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1955      much at a time.  */
1956   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1957     unit = BITS_PER_WORD;
1958   else
1959     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1960
1961   while (bitsdone < bitsize)
1962     {
1963       unsigned HOST_WIDE_INT thissize;
1964       rtx part, word;
1965       unsigned HOST_WIDE_INT thispos;
1966       unsigned HOST_WIDE_INT offset;
1967
1968       offset = (bitpos + bitsdone) / unit;
1969       thispos = (bitpos + bitsdone) % unit;
1970
1971       /* THISSIZE must not overrun a word boundary.  Otherwise,
1972          extract_fixed_bit_field will call us again, and we will mutually
1973          recurse forever.  */
1974       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1975       thissize = MIN (thissize, unit - thispos);
1976
1977       /* If OP0 is a register, then handle OFFSET here.
1978
1979          When handling multiword bitfields, extract_bit_field may pass
1980          down a word_mode SUBREG of a larger REG for a bitfield that actually
1981          crosses a word boundary.  Thus, for a SUBREG, we must find
1982          the current word starting from the base register.  */
1983       if (GET_CODE (op0) == SUBREG)
1984         {
1985           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1986           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1987                                         GET_MODE (SUBREG_REG (op0)));
1988           offset = 0;
1989         }
1990       else if (REG_P (op0))
1991         {
1992           word = operand_subword_force (op0, offset, GET_MODE (op0));
1993           offset = 0;
1994         }
1995       else
1996         word = op0;
1997
1998       /* Extract the parts in bit-counting order,
1999          whose meaning is determined by BYTES_PER_UNIT.
2000          OFFSET is in UNITs, and UNIT is in bits.  */
2001       part = extract_fixed_bit_field (word_mode, word, thissize,
2002                                       offset * unit + thispos, 0, 1);
2003       bitsdone += thissize;
2004
2005       /* Shift this part into place for the result.  */
2006       if (BYTES_BIG_ENDIAN)
2007         {
2008           if (bitsize != bitsdone)
2009             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2010                                  bitsize - bitsdone, 0, 1);
2011         }
2012       else
2013         {
2014           if (bitsdone != thissize)
2015             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2016                                  bitsdone - thissize, 0, 1);
2017         }
2018
2019       if (first)
2020         result = part;
2021       else
2022         /* Combine the parts with bitwise or.  This works
2023            because we extracted each part as an unsigned bit field.  */
2024         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2025                                OPTAB_LIB_WIDEN);
2026
2027       first = 0;
2028     }
2029
2030   /* Unsigned bit field: we are done.  */
2031   if (unsignedp)
2032     return result;
2033   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2034   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2035                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2036   return expand_shift (RSHIFT_EXPR, word_mode, result,
2037                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2038 }
2039 \f
2040 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2041    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2042    MODE, fill the upper bits with zeros.  Fail if the layout of either
2043    mode is unknown (as for CC modes) or if the extraction would involve
2044    unprofitable mode punning.  Return the value on success, otherwise
2045    return null.
2046
2047    This is different from gen_lowpart* in these respects:
2048
2049      - the returned value must always be considered an rvalue
2050
2051      - when MODE is wider than SRC_MODE, the extraction involves
2052        a zero extension
2053
2054      - when MODE is smaller than SRC_MODE, the extraction involves
2055        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2056
2057    In other words, this routine performs a computation, whereas the
2058    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2059    operations.  */
2060
2061 rtx
2062 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2063 {
2064   machine_mode int_mode, src_int_mode;
2065
2066   if (mode == src_mode)
2067     return src;
2068
2069   if (CONSTANT_P (src))
2070     {
2071       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2072          fails, it will happily create (subreg (symbol_ref)) or similar
2073          invalid SUBREGs.  */
2074       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2075       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2076       if (ret)
2077         return ret;
2078
2079       if (GET_MODE (src) == VOIDmode
2080           || !validate_subreg (mode, src_mode, src, byte))
2081         return NULL_RTX;
2082
2083       src = force_reg (GET_MODE (src), src);
2084       return gen_rtx_SUBREG (mode, src, byte);
2085     }
2086
2087   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2088     return NULL_RTX;
2089
2090   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2091       && MODES_TIEABLE_P (mode, src_mode))
2092     {
2093       rtx x = gen_lowpart_common (mode, src);
2094       if (x)
2095         return x;
2096     }
2097
2098   src_int_mode = int_mode_for_mode (src_mode);
2099   int_mode = int_mode_for_mode (mode);
2100   if (src_int_mode == BLKmode || int_mode == BLKmode)
2101     return NULL_RTX;
2102
2103   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2104     return NULL_RTX;
2105   if (!MODES_TIEABLE_P (int_mode, mode))
2106     return NULL_RTX;
2107
2108   src = gen_lowpart (src_int_mode, src);
2109   src = convert_modes (int_mode, src_int_mode, src, true);
2110   src = gen_lowpart (mode, src);
2111   return src;
2112 }
2113 \f
2114 /* Add INC into TARGET.  */
2115
2116 void
2117 expand_inc (rtx target, rtx inc)
2118 {
2119   rtx value = expand_binop (GET_MODE (target), add_optab,
2120                             target, inc,
2121                             target, 0, OPTAB_LIB_WIDEN);
2122   if (value != target)
2123     emit_move_insn (target, value);
2124 }
2125
2126 /* Subtract DEC from TARGET.  */
2127
2128 void
2129 expand_dec (rtx target, rtx dec)
2130 {
2131   rtx value = expand_binop (GET_MODE (target), sub_optab,
2132                             target, dec,
2133                             target, 0, OPTAB_LIB_WIDEN);
2134   if (value != target)
2135     emit_move_insn (target, value);
2136 }
2137 \f
2138 /* Output a shift instruction for expression code CODE,
2139    with SHIFTED being the rtx for the value to shift,
2140    and AMOUNT the rtx for the amount to shift by.
2141    Store the result in the rtx TARGET, if that is convenient.
2142    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2143    Return the rtx for where the value is.  */
2144
2145 static rtx
2146 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2147                 rtx amount, rtx target, int unsignedp)
2148 {
2149   rtx op1, temp = 0;
2150   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2151   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2152   optab lshift_optab = ashl_optab;
2153   optab rshift_arith_optab = ashr_optab;
2154   optab rshift_uns_optab = lshr_optab;
2155   optab lrotate_optab = rotl_optab;
2156   optab rrotate_optab = rotr_optab;
2157   machine_mode op1_mode;
2158   machine_mode scalar_mode = mode;
2159   int attempt;
2160   bool speed = optimize_insn_for_speed_p ();
2161
2162   if (VECTOR_MODE_P (mode))
2163     scalar_mode = GET_MODE_INNER (mode);
2164   op1 = amount;
2165   op1_mode = GET_MODE (op1);
2166
2167   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2168      shift amount is a vector, use the vector/vector shift patterns.  */
2169   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2170     {
2171       lshift_optab = vashl_optab;
2172       rshift_arith_optab = vashr_optab;
2173       rshift_uns_optab = vlshr_optab;
2174       lrotate_optab = vrotl_optab;
2175       rrotate_optab = vrotr_optab;
2176     }
2177
2178   /* Previously detected shift-counts computed by NEGATE_EXPR
2179      and shifted in the other direction; but that does not work
2180      on all machines.  */
2181
2182   if (SHIFT_COUNT_TRUNCATED)
2183     {
2184       if (CONST_INT_P (op1)
2185           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2186               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2187         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2188                        % GET_MODE_BITSIZE (scalar_mode));
2189       else if (GET_CODE (op1) == SUBREG
2190                && subreg_lowpart_p (op1)
2191                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2192                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2193         op1 = SUBREG_REG (op1);
2194     }
2195
2196   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2197      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2198      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2199      amount instead.  */
2200   if (rotate
2201       && CONST_INT_P (op1)
2202       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2203                    GET_MODE_BITSIZE (scalar_mode) - 1))
2204     {
2205       op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2206       left = !left;
2207       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2208     }
2209
2210   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2211      Note that this is not the case for bigger values.  For instance a rotation
2212      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2213      0x04030201 (bswapsi).  */
2214   if (rotate
2215       && CONST_INT_P (op1)
2216       && INTVAL (op1) == BITS_PER_UNIT
2217       && GET_MODE_SIZE (scalar_mode) == 2
2218       && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2219     return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2220                                   unsignedp);
2221
2222   if (op1 == const0_rtx)
2223     return shifted;
2224
2225   /* Check whether its cheaper to implement a left shift by a constant
2226      bit count by a sequence of additions.  */
2227   if (code == LSHIFT_EXPR
2228       && CONST_INT_P (op1)
2229       && INTVAL (op1) > 0
2230       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2231       && INTVAL (op1) < MAX_BITS_PER_WORD
2232       && (shift_cost (speed, mode, INTVAL (op1))
2233           > INTVAL (op1) * add_cost (speed, mode))
2234       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2235     {
2236       int i;
2237       for (i = 0; i < INTVAL (op1); i++)
2238         {
2239           temp = force_reg (mode, shifted);
2240           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2241                                   unsignedp, OPTAB_LIB_WIDEN);
2242         }
2243       return shifted;
2244     }
2245
2246   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2247     {
2248       enum optab_methods methods;
2249
2250       if (attempt == 0)
2251         methods = OPTAB_DIRECT;
2252       else if (attempt == 1)
2253         methods = OPTAB_WIDEN;
2254       else
2255         methods = OPTAB_LIB_WIDEN;
2256
2257       if (rotate)
2258         {
2259           /* Widening does not work for rotation.  */
2260           if (methods == OPTAB_WIDEN)
2261             continue;
2262           else if (methods == OPTAB_LIB_WIDEN)
2263             {
2264               /* If we have been unable to open-code this by a rotation,
2265                  do it as the IOR of two shifts.  I.e., to rotate A
2266                  by N bits, compute
2267                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2268                  where C is the bitsize of A.
2269
2270                  It is theoretically possible that the target machine might
2271                  not be able to perform either shift and hence we would
2272                  be making two libcalls rather than just the one for the
2273                  shift (similarly if IOR could not be done).  We will allow
2274                  this extremely unlikely lossage to avoid complicating the
2275                  code below.  */
2276
2277               rtx subtarget = target == shifted ? 0 : target;
2278               rtx new_amount, other_amount;
2279               rtx temp1;
2280
2281               new_amount = op1;
2282               if (op1 == const0_rtx)
2283                 return shifted;
2284               else if (CONST_INT_P (op1))
2285                 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2286                                         - INTVAL (op1));
2287               else
2288                 {
2289                   other_amount
2290                     = simplify_gen_unary (NEG, GET_MODE (op1),
2291                                           op1, GET_MODE (op1));
2292                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2293                   other_amount
2294                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2295                                            gen_int_mode (mask, GET_MODE (op1)));
2296                 }
2297
2298               shifted = force_reg (mode, shifted);
2299
2300               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2301                                      mode, shifted, new_amount, 0, 1);
2302               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2303                                       mode, shifted, other_amount,
2304                                       subtarget, 1);
2305               return expand_binop (mode, ior_optab, temp, temp1, target,
2306                                    unsignedp, methods);
2307             }
2308
2309           temp = expand_binop (mode,
2310                                left ? lrotate_optab : rrotate_optab,
2311                                shifted, op1, target, unsignedp, methods);
2312         }
2313       else if (unsignedp)
2314         temp = expand_binop (mode,
2315                              left ? lshift_optab : rshift_uns_optab,
2316                              shifted, op1, target, unsignedp, methods);
2317
2318       /* Do arithmetic shifts.
2319          Also, if we are going to widen the operand, we can just as well
2320          use an arithmetic right-shift instead of a logical one.  */
2321       if (temp == 0 && ! rotate
2322           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2323         {
2324           enum optab_methods methods1 = methods;
2325
2326           /* If trying to widen a log shift to an arithmetic shift,
2327              don't accept an arithmetic shift of the same size.  */
2328           if (unsignedp)
2329             methods1 = OPTAB_MUST_WIDEN;
2330
2331           /* Arithmetic shift */
2332
2333           temp = expand_binop (mode,
2334                                left ? lshift_optab : rshift_arith_optab,
2335                                shifted, op1, target, unsignedp, methods1);
2336         }
2337
2338       /* We used to try extzv here for logical right shifts, but that was
2339          only useful for one machine, the VAX, and caused poor code
2340          generation there for lshrdi3, so the code was deleted and a
2341          define_expand for lshrsi3 was added to vax.md.  */
2342     }
2343
2344   gcc_assert (temp);
2345   return temp;
2346 }
2347
2348 /* Output a shift instruction for expression code CODE,
2349    with SHIFTED being the rtx for the value to shift,
2350    and AMOUNT the amount to shift by.
2351    Store the result in the rtx TARGET, if that is convenient.
2352    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2353    Return the rtx for where the value is.  */
2354
2355 rtx
2356 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2357               int amount, rtx target, int unsignedp)
2358 {
2359   return expand_shift_1 (code, mode,
2360                          shifted, GEN_INT (amount), target, unsignedp);
2361 }
2362
2363 /* Output a shift instruction for expression code CODE,
2364    with SHIFTED being the rtx for the value to shift,
2365    and AMOUNT the tree for the amount to shift by.
2366    Store the result in the rtx TARGET, if that is convenient.
2367    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2368    Return the rtx for where the value is.  */
2369
2370 rtx
2371 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2372                        tree amount, rtx target, int unsignedp)
2373 {
2374   return expand_shift_1 (code, mode,
2375                          shifted, expand_normal (amount), target, unsignedp);
2376 }
2377
2378 \f
2379 /* Indicates the type of fixup needed after a constant multiplication.
2380    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2381    the result should be negated, and ADD_VARIANT means that the
2382    multiplicand should be added to the result.  */
2383 enum mult_variant {basic_variant, negate_variant, add_variant};
2384
2385 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2386                         const struct mult_cost *, machine_mode mode);
2387 static bool choose_mult_variant (machine_mode, HOST_WIDE_INT,
2388                                  struct algorithm *, enum mult_variant *, int);
2389 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2390                               const struct algorithm *, enum mult_variant);
2391 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2392 static rtx extract_high_half (machine_mode, rtx);
2393 static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2394 static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
2395                                        int, int);
2396 /* Compute and return the best algorithm for multiplying by T.
2397    The algorithm must cost less than cost_limit
2398    If retval.cost >= COST_LIMIT, no algorithm was found and all
2399    other field of the returned struct are undefined.
2400    MODE is the machine mode of the multiplication.  */
2401
2402 static void
2403 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2404             const struct mult_cost *cost_limit, machine_mode mode)
2405 {
2406   int m;
2407   struct algorithm *alg_in, *best_alg;
2408   struct mult_cost best_cost;
2409   struct mult_cost new_limit;
2410   int op_cost, op_latency;
2411   unsigned HOST_WIDE_INT orig_t = t;
2412   unsigned HOST_WIDE_INT q;
2413   int maxm, hash_index;
2414   bool cache_hit = false;
2415   enum alg_code cache_alg = alg_zero;
2416   bool speed = optimize_insn_for_speed_p ();
2417   machine_mode imode;
2418   struct alg_hash_entry *entry_ptr;
2419
2420   /* Indicate that no algorithm is yet found.  If no algorithm
2421      is found, this value will be returned and indicate failure.  */
2422   alg_out->cost.cost = cost_limit->cost + 1;
2423   alg_out->cost.latency = cost_limit->latency + 1;
2424
2425   if (cost_limit->cost < 0
2426       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2427     return;
2428
2429   /* Be prepared for vector modes.  */
2430   imode = GET_MODE_INNER (mode);
2431   if (imode == VOIDmode)
2432     imode = mode;
2433
2434   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2435
2436   /* Restrict the bits of "t" to the multiplication's mode.  */
2437   t &= GET_MODE_MASK (imode);
2438
2439   /* t == 1 can be done in zero cost.  */
2440   if (t == 1)
2441     {
2442       alg_out->ops = 1;
2443       alg_out->cost.cost = 0;
2444       alg_out->cost.latency = 0;
2445       alg_out->op[0] = alg_m;
2446       return;
2447     }
2448
2449   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2450      fail now.  */
2451   if (t == 0)
2452     {
2453       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2454         return;
2455       else
2456         {
2457           alg_out->ops = 1;
2458           alg_out->cost.cost = zero_cost (speed);
2459           alg_out->cost.latency = zero_cost (speed);
2460           alg_out->op[0] = alg_zero;
2461           return;
2462         }
2463     }
2464
2465   /* We'll be needing a couple extra algorithm structures now.  */
2466
2467   alg_in = XALLOCA (struct algorithm);
2468   best_alg = XALLOCA (struct algorithm);
2469   best_cost = *cost_limit;
2470
2471   /* Compute the hash index.  */
2472   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2473
2474   /* See if we already know what to do for T.  */
2475   entry_ptr = alg_hash_entry_ptr (hash_index);
2476   if (entry_ptr->t == t
2477       && entry_ptr->mode == mode
2478       && entry_ptr->mode == mode
2479       && entry_ptr->speed == speed
2480       && entry_ptr->alg != alg_unknown)
2481     {
2482       cache_alg = entry_ptr->alg;
2483
2484       if (cache_alg == alg_impossible)
2485         {
2486           /* The cache tells us that it's impossible to synthesize
2487              multiplication by T within entry_ptr->cost.  */
2488           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2489             /* COST_LIMIT is at least as restrictive as the one
2490                recorded in the hash table, in which case we have no
2491                hope of synthesizing a multiplication.  Just
2492                return.  */
2493             return;
2494
2495           /* If we get here, COST_LIMIT is less restrictive than the
2496              one recorded in the hash table, so we may be able to
2497              synthesize a multiplication.  Proceed as if we didn't
2498              have the cache entry.  */
2499         }
2500       else
2501         {
2502           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2503             /* The cached algorithm shows that this multiplication
2504                requires more cost than COST_LIMIT.  Just return.  This
2505                way, we don't clobber this cache entry with
2506                alg_impossible but retain useful information.  */
2507             return;
2508
2509           cache_hit = true;
2510
2511           switch (cache_alg)
2512             {
2513             case alg_shift:
2514               goto do_alg_shift;
2515
2516             case alg_add_t_m2:
2517             case alg_sub_t_m2:
2518               goto do_alg_addsub_t_m2;
2519
2520             case alg_add_factor:
2521             case alg_sub_factor:
2522               goto do_alg_addsub_factor;
2523
2524             case alg_add_t2_m:
2525               goto do_alg_add_t2_m;
2526
2527             case alg_sub_t2_m:
2528               goto do_alg_sub_t2_m;
2529
2530             default:
2531               gcc_unreachable ();
2532             }
2533         }
2534     }
2535
2536   /* If we have a group of zero bits at the low-order part of T, try
2537      multiplying by the remaining bits and then doing a shift.  */
2538
2539   if ((t & 1) == 0)
2540     {
2541     do_alg_shift:
2542       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2543       if (m < maxm)
2544         {
2545           q = t >> m;
2546           /* The function expand_shift will choose between a shift and
2547              a sequence of additions, so the observed cost is given as
2548              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2549           op_cost = m * add_cost (speed, mode);
2550           if (shift_cost (speed, mode, m) < op_cost)
2551             op_cost = shift_cost (speed, mode, m);
2552           new_limit.cost = best_cost.cost - op_cost;
2553           new_limit.latency = best_cost.latency - op_cost;
2554           synth_mult (alg_in, q, &new_limit, mode);
2555
2556           alg_in->cost.cost += op_cost;
2557           alg_in->cost.latency += op_cost;
2558           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2559             {
2560               best_cost = alg_in->cost;
2561               std::swap (alg_in, best_alg);
2562               best_alg->log[best_alg->ops] = m;
2563               best_alg->op[best_alg->ops] = alg_shift;
2564             }
2565
2566           /* See if treating ORIG_T as a signed number yields a better
2567              sequence.  Try this sequence only for a negative ORIG_T
2568              as it would be useless for a non-negative ORIG_T.  */
2569           if ((HOST_WIDE_INT) orig_t < 0)
2570             {
2571               /* Shift ORIG_T as follows because a right shift of a
2572                  negative-valued signed type is implementation
2573                  defined.  */
2574               q = ~(~orig_t >> m);
2575               /* The function expand_shift will choose between a shift
2576                  and a sequence of additions, so the observed cost is
2577                  given as MIN (m * add_cost(speed, mode),
2578                  shift_cost(speed, mode, m)).  */
2579               op_cost = m * add_cost (speed, mode);
2580               if (shift_cost (speed, mode, m) < op_cost)
2581                 op_cost = shift_cost (speed, mode, m);
2582               new_limit.cost = best_cost.cost - op_cost;
2583               new_limit.latency = best_cost.latency - op_cost;
2584               synth_mult (alg_in, q, &new_limit, mode);
2585
2586               alg_in->cost.cost += op_cost;
2587               alg_in->cost.latency += op_cost;
2588               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2589                 {
2590                   best_cost = alg_in->cost;
2591                   std::swap (alg_in, best_alg);
2592                   best_alg->log[best_alg->ops] = m;
2593                   best_alg->op[best_alg->ops] = alg_shift;
2594                 }
2595             }
2596         }
2597       if (cache_hit)
2598         goto done;
2599     }
2600
2601   /* If we have an odd number, add or subtract one.  */
2602   if ((t & 1) != 0)
2603     {
2604       unsigned HOST_WIDE_INT w;
2605
2606     do_alg_addsub_t_m2:
2607       for (w = 1; (w & t) != 0; w <<= 1)
2608         ;
2609       /* If T was -1, then W will be zero after the loop.  This is another
2610          case where T ends with ...111.  Handling this with (T + 1) and
2611          subtract 1 produces slightly better code and results in algorithm
2612          selection much faster than treating it like the ...0111 case
2613          below.  */
2614       if (w == 0
2615           || (w > 2
2616               /* Reject the case where t is 3.
2617                  Thus we prefer addition in that case.  */
2618               && t != 3))
2619         {
2620           /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2621
2622           op_cost = add_cost (speed, mode);
2623           new_limit.cost = best_cost.cost - op_cost;
2624           new_limit.latency = best_cost.latency - op_cost;
2625           synth_mult (alg_in, t + 1, &new_limit, mode);
2626
2627           alg_in->cost.cost += op_cost;
2628           alg_in->cost.latency += op_cost;
2629           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2630             {
2631               best_cost = alg_in->cost;
2632               std::swap (alg_in, best_alg);
2633               best_alg->log[best_alg->ops] = 0;
2634               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2635             }
2636         }
2637       else
2638         {
2639           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2640
2641           op_cost = add_cost (speed, mode);
2642           new_limit.cost = best_cost.cost - op_cost;
2643           new_limit.latency = best_cost.latency - op_cost;
2644           synth_mult (alg_in, t - 1, &new_limit, mode);
2645
2646           alg_in->cost.cost += op_cost;
2647           alg_in->cost.latency += op_cost;
2648           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2649             {
2650               best_cost = alg_in->cost;
2651               std::swap (alg_in, best_alg);
2652               best_alg->log[best_alg->ops] = 0;
2653               best_alg->op[best_alg->ops] = alg_add_t_m2;
2654             }
2655         }
2656
2657       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2658          quickly with a - a * n for some appropriate constant n.  */
2659       m = exact_log2 (-orig_t + 1);
2660       if (m >= 0 && m < maxm)
2661         {
2662           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2663           /* If the target has a cheap shift-and-subtract insn use
2664              that in preference to a shift insn followed by a sub insn.
2665              Assume that the shift-and-sub is "atomic" with a latency
2666              equal to it's cost, otherwise assume that on superscalar
2667              hardware the shift may be executed concurrently with the
2668              earlier steps in the algorithm.  */
2669           if (shiftsub1_cost (speed, mode, m) <= op_cost)
2670             {
2671               op_cost = shiftsub1_cost (speed, mode, m);
2672               op_latency = op_cost;
2673             }
2674           else
2675             op_latency = add_cost (speed, mode);
2676
2677           new_limit.cost = best_cost.cost - op_cost;
2678           new_limit.latency = best_cost.latency - op_latency;
2679           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2680                       &new_limit, mode);
2681
2682           alg_in->cost.cost += op_cost;
2683           alg_in->cost.latency += op_latency;
2684           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2685             {
2686               best_cost = alg_in->cost;
2687               std::swap (alg_in, best_alg);
2688               best_alg->log[best_alg->ops] = m;
2689               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2690             }
2691         }
2692
2693       if (cache_hit)
2694         goto done;
2695     }
2696
2697   /* Look for factors of t of the form
2698      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2699      If we find such a factor, we can multiply by t using an algorithm that
2700      multiplies by q, shift the result by m and add/subtract it to itself.
2701
2702      We search for large factors first and loop down, even if large factors
2703      are less probable than small; if we find a large factor we will find a
2704      good sequence quickly, and therefore be able to prune (by decreasing
2705      COST_LIMIT) the search.  */
2706
2707  do_alg_addsub_factor:
2708   for (m = floor_log2 (t - 1); m >= 2; m--)
2709     {
2710       unsigned HOST_WIDE_INT d;
2711
2712       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2713       if (t % d == 0 && t > d && m < maxm
2714           && (!cache_hit || cache_alg == alg_add_factor))
2715         {
2716           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2717           if (shiftadd_cost (speed, mode, m) <= op_cost)
2718             op_cost = shiftadd_cost (speed, mode, m);
2719
2720           op_latency = op_cost;
2721
2722
2723           new_limit.cost = best_cost.cost - op_cost;
2724           new_limit.latency = best_cost.latency - op_latency;
2725           synth_mult (alg_in, t / d, &new_limit, mode);
2726
2727           alg_in->cost.cost += op_cost;
2728           alg_in->cost.latency += op_latency;
2729           if (alg_in->cost.latency < op_cost)
2730             alg_in->cost.latency = op_cost;
2731           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2732             {
2733               best_cost = alg_in->cost;
2734               std::swap (alg_in, best_alg);
2735               best_alg->log[best_alg->ops] = m;
2736               best_alg->op[best_alg->ops] = alg_add_factor;
2737             }
2738           /* Other factors will have been taken care of in the recursion.  */
2739           break;
2740         }
2741
2742       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2743       if (t % d == 0 && t > d && m < maxm
2744           && (!cache_hit || cache_alg == alg_sub_factor))
2745         {
2746           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2747           if (shiftsub0_cost (speed, mode, m) <= op_cost)
2748             op_cost = shiftsub0_cost (speed, mode, m);
2749
2750           op_latency = op_cost;
2751
2752           new_limit.cost = best_cost.cost - op_cost;
2753           new_limit.latency = best_cost.latency - op_latency;
2754           synth_mult (alg_in, t / d, &new_limit, mode);
2755
2756           alg_in->cost.cost += op_cost;
2757           alg_in->cost.latency += op_latency;
2758           if (alg_in->cost.latency < op_cost)
2759             alg_in->cost.latency = op_cost;
2760           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2761             {
2762               best_cost = alg_in->cost;
2763               std::swap (alg_in, best_alg);
2764               best_alg->log[best_alg->ops] = m;
2765               best_alg->op[best_alg->ops] = alg_sub_factor;
2766             }
2767           break;
2768         }
2769     }
2770   if (cache_hit)
2771     goto done;
2772
2773   /* Try shift-and-add (load effective address) instructions,
2774      i.e. do a*3, a*5, a*9.  */
2775   if ((t & 1) != 0)
2776     {
2777     do_alg_add_t2_m:
2778       q = t - 1;
2779       q = q & -q;
2780       m = exact_log2 (q);
2781       if (m >= 0 && m < maxm)
2782         {
2783           op_cost = shiftadd_cost (speed, mode, m);
2784           new_limit.cost = best_cost.cost - op_cost;
2785           new_limit.latency = best_cost.latency - op_cost;
2786           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2787
2788           alg_in->cost.cost += op_cost;
2789           alg_in->cost.latency += op_cost;
2790           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2791             {
2792               best_cost = alg_in->cost;
2793               std::swap (alg_in, best_alg);
2794               best_alg->log[best_alg->ops] = m;
2795               best_alg->op[best_alg->ops] = alg_add_t2_m;
2796             }
2797         }
2798       if (cache_hit)
2799         goto done;
2800
2801     do_alg_sub_t2_m:
2802       q = t + 1;
2803       q = q & -q;
2804       m = exact_log2 (q);
2805       if (m >= 0 && m < maxm)
2806         {
2807           op_cost = shiftsub0_cost (speed, mode, m);
2808           new_limit.cost = best_cost.cost - op_cost;
2809           new_limit.latency = best_cost.latency - op_cost;
2810           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2811
2812           alg_in->cost.cost += op_cost;
2813           alg_in->cost.latency += op_cost;
2814           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2815             {
2816               best_cost = alg_in->cost;
2817               std::swap (alg_in, best_alg);
2818               best_alg->log[best_alg->ops] = m;
2819               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2820             }
2821         }
2822       if (cache_hit)
2823         goto done;
2824     }
2825
2826  done:
2827   /* If best_cost has not decreased, we have not found any algorithm.  */
2828   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2829     {
2830       /* We failed to find an algorithm.  Record alg_impossible for
2831          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2832          we are asked to find an algorithm for T within the same or
2833          lower COST_LIMIT, we can immediately return to the
2834          caller.  */
2835       entry_ptr->t = t;
2836       entry_ptr->mode = mode;
2837       entry_ptr->speed = speed;
2838       entry_ptr->alg = alg_impossible;
2839       entry_ptr->cost = *cost_limit;
2840       return;
2841     }
2842
2843   /* Cache the result.  */
2844   if (!cache_hit)
2845     {
2846       entry_ptr->t = t;
2847       entry_ptr->mode = mode;
2848       entry_ptr->speed = speed;
2849       entry_ptr->alg = best_alg->op[best_alg->ops];
2850       entry_ptr->cost.cost = best_cost.cost;
2851       entry_ptr->cost.latency = best_cost.latency;
2852     }
2853
2854   /* If we are getting a too long sequence for `struct algorithm'
2855      to record, make this search fail.  */
2856   if (best_alg->ops == MAX_BITS_PER_WORD)
2857     return;
2858
2859   /* Copy the algorithm from temporary space to the space at alg_out.
2860      We avoid using structure assignment because the majority of
2861      best_alg is normally undefined, and this is a critical function.  */
2862   alg_out->ops = best_alg->ops + 1;
2863   alg_out->cost = best_cost;
2864   memcpy (alg_out->op, best_alg->op,
2865           alg_out->ops * sizeof *alg_out->op);
2866   memcpy (alg_out->log, best_alg->log,
2867           alg_out->ops * sizeof *alg_out->log);
2868 }
2869 \f
2870 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2871    Try three variations:
2872
2873        - a shift/add sequence based on VAL itself
2874        - a shift/add sequence based on -VAL, followed by a negation
2875        - a shift/add sequence based on VAL - 1, followed by an addition.
2876
2877    Return true if the cheapest of these cost less than MULT_COST,
2878    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2879
2880 static bool
2881 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
2882                      struct algorithm *alg, enum mult_variant *variant,
2883                      int mult_cost)
2884 {
2885   struct algorithm alg2;
2886   struct mult_cost limit;
2887   int op_cost;
2888   bool speed = optimize_insn_for_speed_p ();
2889
2890   /* Fail quickly for impossible bounds.  */
2891   if (mult_cost < 0)
2892     return false;
2893
2894   /* Ensure that mult_cost provides a reasonable upper bound.
2895      Any constant multiplication can be performed with less
2896      than 2 * bits additions.  */
2897   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2898   if (mult_cost > op_cost)
2899     mult_cost = op_cost;
2900
2901   *variant = basic_variant;
2902   limit.cost = mult_cost;
2903   limit.latency = mult_cost;
2904   synth_mult (alg, val, &limit, mode);
2905
2906   /* This works only if the inverted value actually fits in an
2907      `unsigned int' */
2908   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2909     {
2910       op_cost = neg_cost (speed, mode);
2911       if (MULT_COST_LESS (&alg->cost, mult_cost))
2912         {
2913           limit.cost = alg->cost.cost - op_cost;
2914           limit.latency = alg->cost.latency - op_cost;
2915         }
2916       else
2917         {
2918           limit.cost = mult_cost - op_cost;
2919           limit.latency = mult_cost - op_cost;
2920         }
2921
2922       synth_mult (&alg2, -val, &limit, mode);
2923       alg2.cost.cost += op_cost;
2924       alg2.cost.latency += op_cost;
2925       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2926         *alg = alg2, *variant = negate_variant;
2927     }
2928
2929   /* This proves very useful for division-by-constant.  */
2930   op_cost = add_cost (speed, mode);
2931   if (MULT_COST_LESS (&alg->cost, mult_cost))
2932     {
2933       limit.cost = alg->cost.cost - op_cost;
2934       limit.latency = alg->cost.latency - op_cost;
2935     }
2936   else
2937     {
2938       limit.cost = mult_cost - op_cost;
2939       limit.latency = mult_cost - op_cost;
2940     }
2941
2942   synth_mult (&alg2, val - 1, &limit, mode);
2943   alg2.cost.cost += op_cost;
2944   alg2.cost.latency += op_cost;
2945   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2946     *alg = alg2, *variant = add_variant;
2947
2948   return MULT_COST_LESS (&alg->cost, mult_cost);
2949 }
2950
2951 /* A subroutine of expand_mult, used for constant multiplications.
2952    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2953    convenient.  Use the shift/add sequence described by ALG and apply
2954    the final fixup specified by VARIANT.  */
2955
2956 static rtx
2957 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
2958                    rtx target, const struct algorithm *alg,
2959                    enum mult_variant variant)
2960 {
2961   HOST_WIDE_INT val_so_far;
2962   rtx_insn *insn;
2963   rtx accum, tem;
2964   int opno;
2965   machine_mode nmode;
2966
2967   /* Avoid referencing memory over and over and invalid sharing
2968      on SUBREGs.  */
2969   op0 = force_reg (mode, op0);
2970
2971   /* ACCUM starts out either as OP0 or as a zero, depending on
2972      the first operation.  */
2973
2974   if (alg->op[0] == alg_zero)
2975     {
2976       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2977       val_so_far = 0;
2978     }
2979   else if (alg->op[0] == alg_m)
2980     {
2981       accum = copy_to_mode_reg (mode, op0);
2982       val_so_far = 1;
2983     }
2984   else
2985     gcc_unreachable ();
2986
2987   for (opno = 1; opno < alg->ops; opno++)
2988     {
2989       int log = alg->log[opno];
2990       rtx shift_subtarget = optimize ? 0 : accum;
2991       rtx add_target
2992         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2993            && !optimize)
2994           ? target : 0;
2995       rtx accum_target = optimize ? 0 : accum;
2996       rtx accum_inner;
2997
2998       switch (alg->op[opno])
2999         {
3000         case alg_shift:
3001           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3002           /* REG_EQUAL note will be attached to the following insn.  */
3003           emit_move_insn (accum, tem);
3004           val_so_far <<= log;
3005           break;
3006
3007         case alg_add_t_m2:
3008           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3009           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3010                                  add_target ? add_target : accum_target);
3011           val_so_far += (HOST_WIDE_INT) 1 << log;
3012           break;
3013
3014         case alg_sub_t_m2:
3015           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3016           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3017                                  add_target ? add_target : accum_target);
3018           val_so_far -= (HOST_WIDE_INT) 1 << log;
3019           break;
3020
3021         case alg_add_t2_m:
3022           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3023                                 log, shift_subtarget, 0);
3024           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3025                                  add_target ? add_target : accum_target);
3026           val_so_far = (val_so_far << log) + 1;
3027           break;
3028
3029         case alg_sub_t2_m:
3030           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3031                                 log, shift_subtarget, 0);
3032           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3033                                  add_target ? add_target : accum_target);
3034           val_so_far = (val_so_far << log) - 1;
3035           break;
3036
3037         case alg_add_factor:
3038           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3039           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3040                                  add_target ? add_target : accum_target);
3041           val_so_far += val_so_far << log;
3042           break;
3043
3044         case alg_sub_factor:
3045           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3046           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3047                                  (add_target
3048                                   ? add_target : (optimize ? 0 : tem)));
3049           val_so_far = (val_so_far << log) - val_so_far;
3050           break;
3051
3052         default:
3053           gcc_unreachable ();
3054         }
3055
3056       if (SCALAR_INT_MODE_P (mode))
3057         {
3058           /* Write a REG_EQUAL note on the last insn so that we can cse
3059              multiplication sequences.  Note that if ACCUM is a SUBREG,
3060              we've set the inner register and must properly indicate that.  */
3061           tem = op0, nmode = mode;
3062           accum_inner = accum;
3063           if (GET_CODE (accum) == SUBREG)
3064             {
3065               accum_inner = SUBREG_REG (accum);
3066               nmode = GET_MODE (accum_inner);
3067               tem = gen_lowpart (nmode, op0);
3068             }
3069
3070           insn = get_last_insn ();
3071           set_dst_reg_note (insn, REG_EQUAL,
3072                             gen_rtx_MULT (nmode, tem,
3073                                           gen_int_mode (val_so_far, nmode)),
3074                             accum_inner);
3075         }
3076     }
3077
3078   if (variant == negate_variant)
3079     {
3080       val_so_far = -val_so_far;
3081       accum = expand_unop (mode, neg_optab, accum, target, 0);
3082     }
3083   else if (variant == add_variant)
3084     {
3085       val_so_far = val_so_far + 1;
3086       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3087     }
3088
3089   /* Compare only the bits of val and val_so_far that are significant
3090      in the result mode, to avoid sign-/zero-extension confusion.  */
3091   nmode = GET_MODE_INNER (mode);
3092   if (nmode == VOIDmode)
3093     nmode = mode;
3094   val &= GET_MODE_MASK (nmode);
3095   val_so_far &= GET_MODE_MASK (nmode);
3096   gcc_assert (val == val_so_far);
3097
3098   return accum;
3099 }
3100
3101 /* Perform a multiplication and return an rtx for the result.
3102    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3103    TARGET is a suggestion for where to store the result (an rtx).
3104
3105    We check specially for a constant integer as OP1.
3106    If you want this check for OP0 as well, then before calling
3107    you should swap the two operands if OP0 would be constant.  */
3108
3109 rtx
3110 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3111              int unsignedp)
3112 {
3113   enum mult_variant variant;
3114   struct algorithm algorithm;
3115   rtx scalar_op1;
3116   int max_cost;
3117   bool speed = optimize_insn_for_speed_p ();
3118   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3119
3120   if (CONSTANT_P (op0))
3121     std::swap (op0, op1);
3122
3123   /* For vectors, there are several simplifications that can be made if
3124      all elements of the vector constant are identical.  */
3125   scalar_op1 = op1;
3126   if (GET_CODE (op1) == CONST_VECTOR)
3127     {
3128       int i, n = CONST_VECTOR_NUNITS (op1);
3129       scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3130       for (i = 1; i < n; ++i)
3131         if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3132           goto skip_scalar;
3133     }
3134
3135   if (INTEGRAL_MODE_P (mode))
3136     {
3137       rtx fake_reg;
3138       HOST_WIDE_INT coeff;
3139       bool is_neg;
3140       int mode_bitsize;
3141
3142       if (op1 == CONST0_RTX (mode))
3143         return op1;
3144       if (op1 == CONST1_RTX (mode))
3145         return op0;
3146       if (op1 == CONSTM1_RTX (mode))
3147         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3148                             op0, target, 0);
3149
3150       if (do_trapv)
3151         goto skip_synth;
3152
3153       /* If mode is integer vector mode, check if the backend supports
3154          vector lshift (by scalar or vector) at all.  If not, we can't use
3155          synthetized multiply.  */
3156       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3157           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3158           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3159         goto skip_synth;
3160
3161       /* These are the operations that are potentially turned into
3162          a sequence of shifts and additions.  */
3163       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3164
3165       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3166          less than or equal in size to `unsigned int' this doesn't matter.
3167          If the mode is larger than `unsigned int', then synth_mult works
3168          only if the constant value exactly fits in an `unsigned int' without
3169          any truncation.  This means that multiplying by negative values does
3170          not work; results are off by 2^32 on a 32 bit machine.  */
3171       if (CONST_INT_P (scalar_op1))
3172         {
3173           coeff = INTVAL (scalar_op1);
3174           is_neg = coeff < 0;
3175         }
3176 #if TARGET_SUPPORTS_WIDE_INT
3177       else if (CONST_WIDE_INT_P (scalar_op1))
3178 #else
3179       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3180 #endif
3181         {
3182           int shift = wi::exact_log2 (std::make_pair (scalar_op1, mode));
3183           /* Perfect power of 2 (other than 1, which is handled above).  */
3184           if (shift > 0)
3185             return expand_shift (LSHIFT_EXPR, mode, op0,
3186                                  shift, target, unsignedp);
3187           else
3188             goto skip_synth;
3189         }
3190       else
3191         goto skip_synth;
3192
3193       /* We used to test optimize here, on the grounds that it's better to
3194          produce a smaller program when -O is not used.  But this causes
3195          such a terrible slowdown sometimes that it seems better to always
3196          use synth_mult.  */
3197
3198       /* Special case powers of two.  */
3199       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3200           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3201         return expand_shift (LSHIFT_EXPR, mode, op0,
3202                              floor_log2 (coeff), target, unsignedp);
3203
3204       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3205
3206       /* Attempt to handle multiplication of DImode values by negative
3207          coefficients, by performing the multiplication by a positive
3208          multiplier and then inverting the result.  */
3209       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3210         {
3211           /* Its safe to use -coeff even for INT_MIN, as the
3212              result is interpreted as an unsigned coefficient.
3213              Exclude cost of op0 from max_cost to match the cost
3214              calculation of the synth_mult.  */
3215           coeff = -(unsigned HOST_WIDE_INT) coeff;
3216           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3217                       - neg_cost (speed, mode));
3218           if (max_cost <= 0)
3219             goto skip_synth;
3220
3221           /* Special case powers of two.  */
3222           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3223             {
3224               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3225                                        floor_log2 (coeff), target, unsignedp);
3226               return expand_unop (mode, neg_optab, temp, target, 0);
3227             }
3228
3229           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3230                                    max_cost))
3231             {
3232               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3233                                             &algorithm, variant);
3234               return expand_unop (mode, neg_optab, temp, target, 0);
3235             }
3236           goto skip_synth;
3237         }
3238
3239       /* Exclude cost of op0 from max_cost to match the cost
3240          calculation of the synth_mult.  */
3241       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3242       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3243         return expand_mult_const (mode, op0, coeff, target,
3244                                   &algorithm, variant);
3245     }
3246  skip_synth:
3247
3248   /* Expand x*2.0 as x+x.  */
3249   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3250     {
3251       REAL_VALUE_TYPE d;
3252       REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3253
3254       if (REAL_VALUES_EQUAL (d, dconst2))
3255         {
3256           op0 = force_reg (GET_MODE (op0), op0);
3257           return expand_binop (mode, add_optab, op0, op0,
3258                                target, unsignedp, OPTAB_LIB_WIDEN);
3259         }
3260     }
3261  skip_scalar:
3262
3263   /* This used to use umul_optab if unsigned, but for non-widening multiply
3264      there is no difference between signed and unsigned.  */
3265   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3266                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3267   gcc_assert (op0);
3268   return op0;
3269 }
3270
3271 /* Return a cost estimate for multiplying a register by the given
3272    COEFFicient in the given MODE and SPEED.  */
3273
3274 int
3275 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3276 {
3277   int max_cost;
3278   struct algorithm algorithm;
3279   enum mult_variant variant;
3280
3281   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3282   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3283   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3284     return algorithm.cost.cost;
3285   else
3286     return max_cost;
3287 }
3288
3289 /* Perform a widening multiplication and return an rtx for the result.
3290    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3291    TARGET is a suggestion for where to store the result (an rtx).
3292    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3293    or smul_widen_optab.
3294
3295    We check specially for a constant integer as OP1, comparing the
3296    cost of a widening multiply against the cost of a sequence of shifts
3297    and adds.  */
3298
3299 rtx
3300 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3301                       int unsignedp, optab this_optab)
3302 {
3303   bool speed = optimize_insn_for_speed_p ();
3304   rtx cop1;
3305
3306   if (CONST_INT_P (op1)
3307       && GET_MODE (op0) != VOIDmode
3308       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3309                                 this_optab == umul_widen_optab))
3310       && CONST_INT_P (cop1)
3311       && (INTVAL (cop1) >= 0
3312           || HWI_COMPUTABLE_MODE_P (mode)))
3313     {
3314       HOST_WIDE_INT coeff = INTVAL (cop1);
3315       int max_cost;
3316       enum mult_variant variant;
3317       struct algorithm algorithm;
3318
3319       if (coeff == 0)
3320         return CONST0_RTX (mode);
3321
3322       /* Special case powers of two.  */
3323       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3324         {
3325           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3326           return expand_shift (LSHIFT_EXPR, mode, op0,
3327                                floor_log2 (coeff), target, unsignedp);
3328         }
3329
3330       /* Exclude cost of op0 from max_cost to match the cost
3331          calculation of the synth_mult.  */
3332       max_cost = mul_widen_cost (speed, mode);
3333       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3334                                max_cost))
3335         {
3336           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3337           return expand_mult_const (mode, op0, coeff, target,
3338                                     &algorithm, variant);
3339         }
3340     }
3341   return expand_binop (mode, this_optab, op0, op1, target,
3342                        unsignedp, OPTAB_LIB_WIDEN);
3343 }
3344 \f
3345 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3346    replace division by D, and put the least significant N bits of the result
3347    in *MULTIPLIER_PTR and return the most significant bit.
3348
3349    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3350    needed precision is in PRECISION (should be <= N).
3351
3352    PRECISION should be as small as possible so this function can choose
3353    multiplier more freely.
3354
3355    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3356    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3357
3358    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3359    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3360
3361 unsigned HOST_WIDE_INT
3362 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3363                    unsigned HOST_WIDE_INT *multiplier_ptr,
3364                    int *post_shift_ptr, int *lgup_ptr)
3365 {
3366   int lgup, post_shift;
3367   int pow, pow2;
3368
3369   /* lgup = ceil(log2(divisor)); */
3370   lgup = ceil_log2 (d);
3371
3372   gcc_assert (lgup <= n);
3373
3374   pow = n + lgup;
3375   pow2 = n + lgup - precision;
3376
3377   /* mlow = 2^(N + lgup)/d */
3378   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3379   wide_int mlow = wi::udiv_trunc (val, d);
3380
3381   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3382   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3383   wide_int mhigh = wi::udiv_trunc (val, d);
3384
3385   /* If precision == N, then mlow, mhigh exceed 2^N
3386      (but they do not exceed 2^(N+1)).  */
3387
3388   /* Reduce to lowest terms.  */
3389   for (post_shift = lgup; post_shift > 0; post_shift--)
3390     {
3391       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3392                                                        HOST_BITS_PER_WIDE_INT);
3393       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3394                                                        HOST_BITS_PER_WIDE_INT);
3395       if (ml_lo >= mh_lo)
3396         break;
3397
3398       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3399       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3400     }
3401
3402   *post_shift_ptr = post_shift;
3403   *lgup_ptr = lgup;
3404   if (n < HOST_BITS_PER_WIDE_INT)
3405     {
3406       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3407       *multiplier_ptr = mhigh.to_uhwi () & mask;
3408       return mhigh.to_uhwi () >= mask;
3409     }
3410   else
3411     {
3412       *multiplier_ptr = mhigh.to_uhwi ();
3413       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3414     }
3415 }
3416
3417 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3418    congruent to 1 (mod 2**N).  */
3419
3420 static unsigned HOST_WIDE_INT
3421 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3422 {
3423   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3424
3425   /* The algorithm notes that the choice y = x satisfies
3426      x*y == 1 mod 2^3, since x is assumed odd.
3427      Each iteration doubles the number of bits of significance in y.  */
3428
3429   unsigned HOST_WIDE_INT mask;
3430   unsigned HOST_WIDE_INT y = x;
3431   int nbit = 3;
3432
3433   mask = (n == HOST_BITS_PER_WIDE_INT
3434           ? ~(unsigned HOST_WIDE_INT) 0
3435           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3436
3437   while (nbit < n)
3438     {
3439       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3440       nbit *= 2;
3441     }
3442   return y;
3443 }
3444
3445 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3446    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3447    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3448    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3449    become signed.
3450
3451    The result is put in TARGET if that is convenient.
3452
3453    MODE is the mode of operation.  */
3454
3455 rtx
3456 expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
3457                              rtx op1, rtx target, int unsignedp)
3458 {
3459   rtx tem;
3460   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3461
3462   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3463                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3464   tem = expand_and (mode, tem, op1, NULL_RTX);
3465   adj_operand
3466     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3467                      adj_operand);
3468
3469   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3470                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3471   tem = expand_and (mode, tem, op0, NULL_RTX);
3472   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3473                           target);
3474
3475   return target;
3476 }
3477
3478 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3479
3480 static rtx
3481 extract_high_half (machine_mode mode, rtx op)
3482 {
3483   machine_mode wider_mode;
3484
3485   if (mode == word_mode)
3486     return gen_highpart (mode, op);
3487
3488   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3489
3490   wider_mode = GET_MODE_WIDER_MODE (mode);
3491   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3492                      GET_MODE_BITSIZE (mode), 0, 1);
3493   return convert_modes (mode, wider_mode, op, 0);
3494 }
3495
3496 /* Like expmed_mult_highpart, but only consider using a multiplication
3497    optab.  OP1 is an rtx for the constant operand.  */
3498
3499 static rtx
3500 expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
3501                             rtx target, int unsignedp, int max_cost)
3502 {
3503   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3504   machine_mode wider_mode;
3505   optab moptab;
3506   rtx tem;
3507   int size;
3508   bool speed = optimize_insn_for_speed_p ();
3509
3510   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3511
3512   wider_mode = GET_MODE_WIDER_MODE (mode);
3513   size = GET_MODE_BITSIZE (mode);
3514
3515   /* Firstly, try using a multiplication insn that only generates the needed
3516      high part of the product, and in the sign flavor of unsignedp.  */
3517   if (mul_highpart_cost (speed, mode) < max_cost)
3518     {
3519       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3520       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3521                           unsignedp, OPTAB_DIRECT);
3522       if (tem)
3523         return tem;
3524     }
3525
3526   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3527      Need to adjust the result after the multiplication.  */
3528   if (size - 1 < BITS_PER_WORD
3529       && (mul_highpart_cost (speed, mode)
3530           + 2 * shift_cost (speed, mode, size-1)
3531           + 4 * add_cost (speed, mode) < max_cost))
3532     {
3533       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3534       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3535                           unsignedp, OPTAB_DIRECT);
3536       if (tem)
3537         /* We used the wrong signedness.  Adjust the result.  */
3538         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3539                                             tem, unsignedp);
3540     }
3541
3542   /* Try widening multiplication.  */
3543   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3544   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3545       && mul_widen_cost (speed, wider_mode) < max_cost)
3546     {
3547       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3548                           unsignedp, OPTAB_WIDEN);
3549       if (tem)
3550         return extract_high_half (mode, tem);
3551     }
3552
3553   /* Try widening the mode and perform a non-widening multiplication.  */
3554   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3555       && size - 1 < BITS_PER_WORD
3556       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3557           < max_cost))
3558     {
3559       rtx_insn *insns;
3560       rtx wop0, wop1;
3561
3562       /* We need to widen the operands, for example to ensure the
3563          constant multiplier is correctly sign or zero extended.
3564          Use a sequence to clean-up any instructions emitted by
3565          the conversions if things don't work out.  */
3566       start_sequence ();
3567       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3568       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3569       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3570                           unsignedp, OPTAB_WIDEN);
3571       insns = get_insns ();
3572       end_sequence ();
3573
3574       if (tem)
3575         {
3576           emit_insn (insns);
3577           return extract_high_half (mode, tem);
3578         }
3579     }
3580
3581   /* Try widening multiplication of opposite signedness, and adjust.  */
3582   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3583   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3584       && size - 1 < BITS_PER_WORD
3585       && (mul_widen_cost (speed, wider_mode)
3586           + 2 * shift_cost (speed, mode, size-1)
3587           + 4 * add_cost (speed, mode) < max_cost))
3588     {
3589       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3590                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3591       if (tem != 0)
3592         {
3593           tem = extract_high_half (mode, tem);
3594           /* We used the wrong signedness.  Adjust the result.  */
3595           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3596                                               target, unsignedp);
3597         }
3598     }
3599
3600   return 0;
3601 }
3602
3603 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3604    putting the high half of the result in TARGET if that is convenient,
3605    and return where the result is.  If the operation can not be performed,
3606    0 is returned.
3607
3608    MODE is the mode of operation and result.
3609
3610    UNSIGNEDP nonzero means unsigned multiply.
3611
3612    MAX_COST is the total allowed cost for the expanded RTL.  */
3613
3614 static rtx
3615 expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
3616                       rtx target, int unsignedp, int max_cost)
3617 {
3618   machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3619   unsigned HOST_WIDE_INT cnst1;
3620   int extra_cost;
3621   bool sign_adjust = false;
3622   enum mult_variant variant;
3623   struct algorithm alg;
3624   rtx tem;
3625   bool speed = optimize_insn_for_speed_p ();
3626
3627   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3628   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3629   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3630
3631   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3632
3633   /* We can't optimize modes wider than BITS_PER_WORD.
3634      ??? We might be able to perform double-word arithmetic if
3635      mode == word_mode, however all the cost calculations in
3636      synth_mult etc. assume single-word operations.  */
3637   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3638     return expmed_mult_highpart_optab (mode, op0, op1, target,
3639                                        unsignedp, max_cost);
3640
3641   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3642
3643   /* Check whether we try to multiply by a negative constant.  */
3644   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3645     {
3646       sign_adjust = true;
3647       extra_cost += add_cost (speed, mode);
3648     }
3649
3650   /* See whether shift/add multiplication is cheap enough.  */
3651   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3652                            max_cost - extra_cost))
3653     {
3654       /* See whether the specialized multiplication optabs are
3655          cheaper than the shift/add version.  */
3656       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3657                                         alg.cost.cost + extra_cost);
3658       if (tem)
3659         return tem;
3660
3661       tem = convert_to_mode (wider_mode, op0, unsignedp);
3662       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3663       tem = extract_high_half (mode, tem);
3664
3665       /* Adjust result for signedness.  */
3666       if (sign_adjust)
3667         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3668
3669       return tem;
3670     }
3671   return expmed_mult_highpart_optab (mode, op0, op1, target,
3672                                      unsignedp, max_cost);
3673 }
3674
3675
3676 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3677
3678 static rtx
3679 expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3680 {
3681   rtx result, temp, shift;
3682   rtx_code_label *label;
3683   int logd;
3684   int prec = GET_MODE_PRECISION (mode);
3685
3686   logd = floor_log2 (d);
3687   result = gen_reg_rtx (mode);
3688
3689   /* Avoid conditional branches when they're expensive.  */
3690   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3691       && optimize_insn_for_speed_p ())
3692     {
3693       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3694                                       mode, 0, -1);
3695       if (signmask)
3696         {
3697           HOST_WIDE_INT masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3698           signmask = force_reg (mode, signmask);
3699           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3700
3701           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3702              which instruction sequence to use.  If logical right shifts
3703              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3704              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3705
3706           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3707           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3708               || (set_src_cost (temp, optimize_insn_for_speed_p ())
3709                   > COSTS_N_INSNS (2)))
3710             {
3711               temp = expand_binop (mode, xor_optab, op0, signmask,
3712                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3713               temp = expand_binop (mode, sub_optab, temp, signmask,
3714                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3715               temp = expand_binop (mode, and_optab, temp,
3716                                    gen_int_mode (masklow, mode),
3717                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3718               temp = expand_binop (mode, xor_optab, temp, signmask,
3719                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3720               temp = expand_binop (mode, sub_optab, temp, signmask,
3721                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3722             }
3723           else
3724             {
3725               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3726                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3727               signmask = force_reg (mode, signmask);
3728
3729               temp = expand_binop (mode, add_optab, op0, signmask,
3730                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3731               temp = expand_binop (mode, and_optab, temp,
3732                                    gen_int_mode (masklow, mode),
3733                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3734               temp = expand_binop (mode, sub_optab, temp, signmask,
3735                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3736             }
3737           return temp;
3738         }
3739     }
3740
3741   /* Mask contains the mode's signbit and the significant bits of the
3742      modulus.  By including the signbit in the operation, many targets
3743      can avoid an explicit compare operation in the following comparison
3744      against zero.  */
3745   wide_int mask = wi::mask (logd, false, prec);
3746   mask = wi::set_bit (mask, prec - 1);
3747
3748   temp = expand_binop (mode, and_optab, op0,
3749                        immed_wide_int_const (mask, mode),
3750                        result, 1, OPTAB_LIB_WIDEN);
3751   if (temp != result)
3752     emit_move_insn (result, temp);
3753
3754   label = gen_label_rtx ();
3755   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3756
3757   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3758                        0, OPTAB_LIB_WIDEN);
3759
3760   mask = wi::mask (logd, true, prec);
3761   temp = expand_binop (mode, ior_optab, temp,
3762                        immed_wide_int_const (mask, mode),
3763                        result, 1, OPTAB_LIB_WIDEN);
3764   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3765                        0, OPTAB_LIB_WIDEN);
3766   if (temp != result)
3767     emit_move_insn (result, temp);
3768   emit_label (label);
3769   return result;
3770 }
3771
3772 /* Expand signed division of OP0 by a power of two D in mode MODE.
3773    This routine is only called for positive values of D.  */
3774
3775 static rtx
3776 expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3777 {
3778   rtx temp;
3779   rtx_code_label *label;
3780   int logd;
3781
3782   logd = floor_log2 (d);
3783
3784   if (d == 2
3785       && BRANCH_COST (optimize_insn_for_speed_p (),
3786                       false) >= 1)
3787     {
3788       temp = gen_reg_rtx (mode);
3789       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3790       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3791                            0, OPTAB_LIB_WIDEN);
3792       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3793     }
3794
3795   if (HAVE_conditional_move
3796       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
3797     {
3798       rtx temp2;
3799
3800       start_sequence ();
3801       temp2 = copy_to_mode_reg (mode, op0);
3802       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3803                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3804       temp = force_reg (mode, temp);
3805
3806       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3807       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3808                                      mode, temp, temp2, mode, 0);
3809       if (temp2)
3810         {
3811           rtx_insn *seq = get_insns ();
3812           end_sequence ();
3813           emit_insn (seq);
3814           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3815         }
3816       end_sequence ();
3817     }
3818
3819   if (BRANCH_COST (optimize_insn_for_speed_p (),
3820                    false) >= 2)
3821     {
3822       int ushift = GET_MODE_BITSIZE (mode) - logd;
3823
3824       temp = gen_reg_rtx (mode);
3825       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3826       if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3827           || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3828              > COSTS_N_INSNS (1))
3829         temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3830                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3831       else
3832         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3833                              ushift, NULL_RTX, 1);
3834       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3835                            0, OPTAB_LIB_WIDEN);
3836       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3837     }
3838
3839   label = gen_label_rtx ();
3840   temp = copy_to_mode_reg (mode, op0);
3841   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3842   expand_inc (temp, gen_int_mode (d - 1, mode));
3843   emit_label (label);
3844   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3845 }
3846 \f
3847 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3848    if that is convenient, and returning where the result is.
3849    You may request either the quotient or the remainder as the result;
3850    specify REM_FLAG nonzero to get the remainder.
3851
3852    CODE is the expression code for which kind of division this is;
3853    it controls how rounding is done.  MODE is the machine mode to use.
3854    UNSIGNEDP nonzero means do unsigned division.  */
3855
3856 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3857    and then correct it by or'ing in missing high bits
3858    if result of ANDI is nonzero.
3859    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3860    This could optimize to a bfexts instruction.
3861    But C doesn't use these operations, so their optimizations are
3862    left for later.  */
3863 /* ??? For modulo, we don't actually need the highpart of the first product,
3864    the low part will do nicely.  And for small divisors, the second multiply
3865    can also be a low-part only multiply or even be completely left out.
3866    E.g. to calculate the remainder of a division by 3 with a 32 bit
3867    multiply, multiply with 0x55555556 and extract the upper two bits;
3868    the result is exact for inputs up to 0x1fffffff.
3869    The input range can be reduced by using cross-sum rules.
3870    For odd divisors >= 3, the following table gives right shift counts
3871    so that if a number is shifted by an integer multiple of the given
3872    amount, the remainder stays the same:
3873    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3874    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3875    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3876    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3877    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3878
3879    Cross-sum rules for even numbers can be derived by leaving as many bits
3880    to the right alone as the divisor has zeros to the right.
3881    E.g. if x is an unsigned 32 bit number:
3882    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3883    */
3884
3885 rtx
3886 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
3887                rtx op0, rtx op1, rtx target, int unsignedp)
3888 {
3889   machine_mode compute_mode;
3890   rtx tquotient;
3891   rtx quotient = 0, remainder = 0;
3892   rtx_insn *last;
3893   int size;
3894   rtx_insn *insn;
3895   optab optab1, optab2;
3896   int op1_is_constant, op1_is_pow2 = 0;
3897   int max_cost, extra_cost;
3898   static HOST_WIDE_INT last_div_const = 0;
3899   bool speed = optimize_insn_for_speed_p ();
3900
3901   op1_is_constant = CONST_INT_P (op1);
3902   if (op1_is_constant)
3903     {
3904       unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1);
3905       if (unsignedp)
3906         ext_op1 &= GET_MODE_MASK (mode);
3907       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3908                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3909     }
3910
3911   /*
3912      This is the structure of expand_divmod:
3913
3914      First comes code to fix up the operands so we can perform the operations
3915      correctly and efficiently.
3916
3917      Second comes a switch statement with code specific for each rounding mode.
3918      For some special operands this code emits all RTL for the desired
3919      operation, for other cases, it generates only a quotient and stores it in
3920      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3921      to indicate that it has not done anything.
3922
3923      Last comes code that finishes the operation.  If QUOTIENT is set and
3924      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3925      QUOTIENT is not set, it is computed using trunc rounding.
3926
3927      We try to generate special code for division and remainder when OP1 is a
3928      constant.  If |OP1| = 2**n we can use shifts and some other fast
3929      operations.  For other values of OP1, we compute a carefully selected
3930      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3931      by m.
3932
3933      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3934      half of the product.  Different strategies for generating the product are
3935      implemented in expmed_mult_highpart.
3936
3937      If what we actually want is the remainder, we generate that by another
3938      by-constant multiplication and a subtraction.  */
3939
3940   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3941      code below will malfunction if we are, so check here and handle
3942      the special case if so.  */
3943   if (op1 == const1_rtx)
3944     return rem_flag ? const0_rtx : op0;
3945
3946     /* When dividing by -1, we could get an overflow.
3947      negv_optab can handle overflows.  */
3948   if (! unsignedp && op1 == constm1_rtx)
3949     {
3950       if (rem_flag)
3951         return const0_rtx;
3952       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
3953                           ? negv_optab : neg_optab, op0, target, 0);
3954     }
3955
3956   if (target
3957       /* Don't use the function value register as a target
3958          since we have to read it as well as write it,
3959          and function-inlining gets confused by this.  */
3960       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3961           /* Don't clobber an operand while doing a multi-step calculation.  */
3962           || ((rem_flag || op1_is_constant)
3963               && (reg_mentioned_p (target, op0)
3964                   || (MEM_P (op0) && MEM_P (target))))
3965           || reg_mentioned_p (target, op1)
3966           || (MEM_P (op1) && MEM_P (target))))
3967     target = 0;
3968
3969   /* Get the mode in which to perform this computation.  Normally it will
3970      be MODE, but sometimes we can't do the desired operation in MODE.
3971      If so, pick a wider mode in which we can do the operation.  Convert
3972      to that mode at the start to avoid repeated conversions.
3973
3974      First see what operations we need.  These depend on the expression
3975      we are evaluating.  (We assume that divxx3 insns exist under the
3976      same conditions that modxx3 insns and that these insns don't normally
3977      fail.  If these assumptions are not correct, we may generate less
3978      efficient code in some cases.)
3979
3980      Then see if we find a mode in which we can open-code that operation
3981      (either a division, modulus, or shift).  Finally, check for the smallest
3982      mode for which we can do the operation with a library call.  */
3983
3984   /* We might want to refine this now that we have division-by-constant
3985      optimization.  Since expmed_mult_highpart tries so many variants, it is
3986      not straightforward to generalize this.  Maybe we should make an array
3987      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3988
3989   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3990             ? (unsignedp ? lshr_optab : ashr_optab)
3991             : (unsignedp ? udiv_optab : sdiv_optab));
3992   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3993             ? optab1
3994             : (unsignedp ? udivmod_optab : sdivmod_optab));
3995
3996   for (compute_mode = mode; compute_mode != VOIDmode;
3997        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3998     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3999         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4000       break;
4001
4002   if (compute_mode == VOIDmode)
4003     for (compute_mode = mode; compute_mode != VOIDmode;
4004          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4005       if (optab_libfunc (optab1, compute_mode)
4006           || optab_libfunc (optab2, compute_mode))
4007         break;
4008
4009   /* If we still couldn't find a mode, use MODE, but expand_binop will
4010      probably die.  */
4011   if (compute_mode == VOIDmode)
4012     compute_mode = mode;
4013
4014   if (target && GET_MODE (target) == compute_mode)
4015     tquotient = target;
4016   else
4017     tquotient = gen_reg_rtx (compute_mode);
4018
4019   size = GET_MODE_BITSIZE (compute_mode);
4020 #if 0
4021   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4022      (mode), and thereby get better code when OP1 is a constant.  Do that
4023      later.  It will require going over all usages of SIZE below.  */
4024   size = GET_MODE_BITSIZE (mode);
4025 #endif
4026
4027   /* Only deduct something for a REM if the last divide done was
4028      for a different constant.   Then set the constant of the last
4029      divide.  */
4030   max_cost = (unsignedp
4031               ? udiv_cost (speed, compute_mode)
4032               : sdiv_cost (speed, compute_mode));
4033   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4034                      && INTVAL (op1) == last_div_const))
4035     max_cost -= (mul_cost (speed, compute_mode)
4036                  + add_cost (speed, compute_mode));
4037
4038   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4039
4040   /* Now convert to the best mode to use.  */
4041   if (compute_mode != mode)
4042     {
4043       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4044       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4045
4046       /* convert_modes may have placed op1 into a register, so we
4047          must recompute the following.  */
4048       op1_is_constant = CONST_INT_P (op1);
4049       op1_is_pow2 = (op1_is_constant
4050                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4051                           || (! unsignedp
4052                               && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1))))));
4053     }
4054
4055   /* If one of the operands is a volatile MEM, copy it into a register.  */
4056
4057   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4058     op0 = force_reg (compute_mode, op0);
4059   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4060     op1 = force_reg (compute_mode, op1);
4061
4062   /* If we need the remainder or if OP1 is constant, we need to
4063      put OP0 in a register in case it has any queued subexpressions.  */
4064   if (rem_flag || op1_is_constant)
4065     op0 = force_reg (compute_mode, op0);
4066
4067   last = get_last_insn ();
4068
4069   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4070   if (unsignedp)
4071     {
4072       if (code == FLOOR_DIV_EXPR)
4073         code = TRUNC_DIV_EXPR;
4074       if (code == FLOOR_MOD_EXPR)
4075         code = TRUNC_MOD_EXPR;
4076       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4077         code = TRUNC_DIV_EXPR;
4078     }
4079
4080   if (op1 != const0_rtx)
4081     switch (code)
4082       {
4083       case TRUNC_MOD_EXPR:
4084       case TRUNC_DIV_EXPR:
4085         if (op1_is_constant)
4086           {
4087             if (unsignedp)
4088               {
4089                 unsigned HOST_WIDE_INT mh, ml;
4090                 int pre_shift, post_shift;
4091                 int dummy;
4092                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4093                                             & GET_MODE_MASK (compute_mode));
4094
4095                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4096                   {
4097                     pre_shift = floor_log2 (d);
4098                     if (rem_flag)
4099                       {
4100                         unsigned HOST_WIDE_INT mask
4101                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4102                         remainder
4103                           = expand_binop (compute_mode, and_optab, op0,
4104                                           gen_int_mode (mask, compute_mode),
4105                                           remainder, 1,
4106                                           OPTAB_LIB_WIDEN);
4107                         if (remainder)
4108                           return gen_lowpart (mode, remainder);
4109                       }
4110                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4111                                              pre_shift, tquotient, 1);
4112                   }
4113                 else if (size <= HOST_BITS_PER_WIDE_INT)
4114                   {
4115                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4116                       {
4117                         /* Most significant bit of divisor is set; emit an scc
4118                            insn.  */
4119                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4120                                                           compute_mode, 1, 1);
4121                       }
4122                     else
4123                       {
4124                         /* Find a suitable multiplier and right shift count
4125                            instead of multiplying with D.  */
4126
4127                         mh = choose_multiplier (d, size, size,
4128                                                 &ml, &post_shift, &dummy);
4129
4130                         /* If the suggested multiplier is more than SIZE bits,
4131                            we can do better for even divisors, using an
4132                            initial right shift.  */
4133                         if (mh != 0 && (d & 1) == 0)
4134                           {
4135                             pre_shift = floor_log2 (d & -d);
4136                             mh = choose_multiplier (d >> pre_shift, size,
4137                                                     size - pre_shift,
4138                                                     &ml, &post_shift, &dummy);
4139                             gcc_assert (!mh);
4140                           }
4141                         else
4142                           pre_shift = 0;
4143
4144                         if (mh != 0)
4145                           {
4146                             rtx t1, t2, t3, t4;
4147
4148                             if (post_shift - 1 >= BITS_PER_WORD)
4149                               goto fail1;
4150
4151                             extra_cost
4152                               = (shift_cost (speed, compute_mode, post_shift - 1)
4153                                  + shift_cost (speed, compute_mode, 1)
4154                                  + 2 * add_cost (speed, compute_mode));
4155                             t1 = expmed_mult_highpart
4156                               (compute_mode, op0,
4157                                gen_int_mode (ml, compute_mode),
4158                                NULL_RTX, 1, max_cost - extra_cost);
4159                             if (t1 == 0)
4160                               goto fail1;
4161                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4162                                                                op0, t1),
4163                                                 NULL_RTX);
4164                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4165                                                t2, 1, NULL_RTX, 1);
4166                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4167                                                               t1, t3),
4168                                                 NULL_RTX);
4169                             quotient = expand_shift
4170                               (RSHIFT_EXPR, compute_mode, t4,
4171                                post_shift - 1, tquotient, 1);
4172                           }
4173                         else
4174                           {
4175                             rtx t1, t2;
4176
4177                             if (pre_shift >= BITS_PER_WORD
4178                                 || post_shift >= BITS_PER_WORD)
4179                               goto fail1;
4180
4181                             t1 = expand_shift
4182                               (RSHIFT_EXPR, compute_mode, op0,
4183                                pre_shift, NULL_RTX, 1);
4184                             extra_cost
4185                               = (shift_cost (speed, compute_mode, pre_shift)
4186                                  + shift_cost (speed, compute_mode, post_shift));
4187                             t2 = expmed_mult_highpart
4188                               (compute_mode, t1,
4189                                gen_int_mode (ml, compute_mode),
4190                                NULL_RTX, 1, max_cost - extra_cost);
4191                             if (t2 == 0)
4192                               goto fail1;
4193                             quotient = expand_shift
4194                               (RSHIFT_EXPR, compute_mode, t2,
4195                                post_shift, tquotient, 1);
4196                           }
4197                       }
4198                   }
4199                 else            /* Too wide mode to use tricky code */
4200                   break;
4201
4202                 insn = get_last_insn ();
4203                 if (insn != last)
4204                   set_dst_reg_note (insn, REG_EQUAL,
4205                                     gen_rtx_UDIV (compute_mode, op0, op1),
4206                                     quotient);
4207               }
4208             else                /* TRUNC_DIV, signed */
4209               {
4210                 unsigned HOST_WIDE_INT ml;
4211                 int lgup, post_shift;
4212                 rtx mlr;
4213                 HOST_WIDE_INT d = INTVAL (op1);
4214                 unsigned HOST_WIDE_INT abs_d;
4215
4216                 /* Since d might be INT_MIN, we have to cast to
4217                    unsigned HOST_WIDE_INT before negating to avoid
4218                    undefined signed overflow.  */
4219                 abs_d = (d >= 0
4220                          ? (unsigned HOST_WIDE_INT) d
4221                          : - (unsigned HOST_WIDE_INT) d);
4222
4223                 /* n rem d = n rem -d */
4224                 if (rem_flag && d < 0)
4225                   {
4226                     d = abs_d;
4227                     op1 = gen_int_mode (abs_d, compute_mode);
4228                   }
4229
4230                 if (d == 1)
4231                   quotient = op0;
4232                 else if (d == -1)
4233                   quotient = expand_unop (compute_mode, neg_optab, op0,
4234                                           tquotient, 0);
4235                 else if (HOST_BITS_PER_WIDE_INT >= size
4236                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4237                   {
4238                     /* This case is not handled correctly below.  */
4239                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4240                                                 compute_mode, 1, 1);
4241                     if (quotient == 0)
4242                       goto fail1;
4243                   }
4244                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4245                          && (rem_flag
4246                              ? smod_pow2_cheap (speed, compute_mode)
4247                              : sdiv_pow2_cheap (speed, compute_mode))
4248                          /* We assume that cheap metric is true if the
4249                             optab has an expander for this mode.  */
4250                          && ((optab_handler ((rem_flag ? smod_optab
4251                                               : sdiv_optab),
4252                                              compute_mode)
4253                               != CODE_FOR_nothing)
4254                              || (optab_handler (sdivmod_optab,
4255                                                 compute_mode)
4256                                  != CODE_FOR_nothing)))
4257                   ;
4258                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4259                   {
4260                     if (rem_flag)
4261                       {
4262                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4263                         if (remainder)
4264                           return gen_lowpart (mode, remainder);
4265                       }
4266
4267                     if (sdiv_pow2_cheap (speed, compute_mode)
4268                         && ((optab_handler (sdiv_optab, compute_mode)
4269                              != CODE_FOR_nothing)
4270                             || (optab_handler (sdivmod_optab, compute_mode)
4271                                 != CODE_FOR_nothing)))
4272                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4273                                                 compute_mode, op0,
4274                                                 gen_int_mode (abs_d,
4275                                                               compute_mode),
4276                                                 NULL_RTX, 0);
4277                     else
4278                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4279
4280                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4281                        negate the quotient.  */
4282                     if (d < 0)
4283                       {
4284                         insn = get_last_insn ();
4285                         if (insn != last
4286                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4287                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4288                           set_dst_reg_note (insn, REG_EQUAL,
4289                                             gen_rtx_DIV (compute_mode, op0,
4290                                                          gen_int_mode
4291                                                            (abs_d,
4292                                                             compute_mode)),
4293                                             quotient);
4294
4295                         quotient = expand_unop (compute_mode, neg_optab,
4296                                                 quotient, quotient, 0);
4297                       }
4298                   }
4299                 else if (size <= HOST_BITS_PER_WIDE_INT)
4300                   {
4301                     choose_multiplier (abs_d, size, size - 1,
4302                                        &ml, &post_shift, &lgup);
4303                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4304                       {
4305                         rtx t1, t2, t3;
4306
4307                         if (post_shift >= BITS_PER_WORD
4308                             || size - 1 >= BITS_PER_WORD)
4309                           goto fail1;
4310
4311                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4312                                       + shift_cost (speed, compute_mode, size - 1)
4313                                       + add_cost (speed, compute_mode));
4314                         t1 = expmed_mult_highpart
4315                           (compute_mode, op0, gen_int_mode (ml, compute_mode),
4316                            NULL_RTX, 0, max_cost - extra_cost);
4317                         if (t1 == 0)
4318                           goto fail1;
4319                         t2 = expand_shift
4320                           (RSHIFT_EXPR, compute_mode, t1,
4321                            post_shift, NULL_RTX, 0);
4322                         t3 = expand_shift
4323                           (RSHIFT_EXPR, compute_mode, op0,
4324                            size - 1, NULL_RTX, 0);
4325                         if (d < 0)
4326                           quotient
4327                             = force_operand (gen_rtx_MINUS (compute_mode,
4328                                                             t3, t2),
4329                                              tquotient);
4330                         else
4331                           quotient
4332                             = force_operand (gen_rtx_MINUS (compute_mode,
4333                                                             t2, t3),
4334                                              tquotient);
4335                       }
4336                     else
4337                       {
4338                         rtx t1, t2, t3, t4;
4339
4340                         if (post_shift >= BITS_PER_WORD
4341                             || size - 1 >= BITS_PER_WORD)
4342                           goto fail1;
4343
4344                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4345                         mlr = gen_int_mode (ml, compute_mode);
4346                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4347                                       + shift_cost (speed, compute_mode, size - 1)
4348                                       + 2 * add_cost (speed, compute_mode));
4349                         t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4350                                                    NULL_RTX, 0,
4351                                                    max_cost - extra_cost);
4352                         if (t1 == 0)
4353                           goto fail1;
4354                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4355                                                           t1, op0),
4356                                             NULL_RTX);
4357                         t3 = expand_shift
4358                           (RSHIFT_EXPR, compute_mode, t2,
4359                            post_shift, NULL_RTX, 0);
4360                         t4 = expand_shift
4361                           (RSHIFT_EXPR, compute_mode, op0,
4362                            size - 1, NULL_RTX, 0);
4363                         if (d < 0)
4364                           quotient
4365                             = force_operand (gen_rtx_MINUS (compute_mode,
4366                                                             t4, t3),
4367                                              tquotient);
4368                         else
4369                           quotient
4370                             = force_operand (gen_rtx_MINUS (compute_mode,
4371                                                             t3, t4),
4372                                              tquotient);
4373                       }
4374                   }
4375                 else            /* Too wide mode to use tricky code */
4376                   break;
4377
4378                 insn = get_last_insn ();
4379                 if (insn != last)
4380                   set_dst_reg_note (insn, REG_EQUAL,
4381                                     gen_rtx_DIV (compute_mode, op0, op1),
4382                                     quotient);
4383               }
4384             break;
4385           }
4386       fail1:
4387         delete_insns_since (last);
4388         break;
4389
4390       case FLOOR_DIV_EXPR:
4391       case FLOOR_MOD_EXPR:
4392       /* We will come here only for signed operations.  */
4393         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4394           {
4395             unsigned HOST_WIDE_INT mh, ml;
4396             int pre_shift, lgup, post_shift;
4397             HOST_WIDE_INT d = INTVAL (op1);
4398
4399             if (d > 0)
4400               {
4401                 /* We could just as easily deal with negative constants here,
4402                    but it does not seem worth the trouble for GCC 2.6.  */
4403                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4404                   {
4405                     pre_shift = floor_log2 (d);
4406                     if (rem_flag)
4407                       {
4408                         unsigned HOST_WIDE_INT mask
4409                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4410                         remainder = expand_binop
4411                           (compute_mode, and_optab, op0,
4412                            gen_int_mode (mask, compute_mode),
4413                            remainder, 0, OPTAB_LIB_WIDEN);
4414                         if (remainder)
4415                           return gen_lowpart (mode, remainder);
4416                       }
4417                     quotient = expand_shift
4418                       (RSHIFT_EXPR, compute_mode, op0,
4419                        pre_shift, tquotient, 0);
4420                   }
4421                 else
4422                   {
4423                     rtx t1, t2, t3, t4;
4424
4425                     mh = choose_multiplier (d, size, size - 1,
4426                                             &ml, &post_shift, &lgup);
4427                     gcc_assert (!mh);
4428
4429                     if (post_shift < BITS_PER_WORD
4430                         && size - 1 < BITS_PER_WORD)
4431                       {
4432                         t1 = expand_shift
4433                           (RSHIFT_EXPR, compute_mode, op0,
4434                            size - 1, NULL_RTX, 0);
4435                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4436                                            NULL_RTX, 0, OPTAB_WIDEN);
4437                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4438                                       + shift_cost (speed, compute_mode, size - 1)
4439                                       + 2 * add_cost (speed, compute_mode));
4440                         t3 = expmed_mult_highpart
4441                           (compute_mode, t2, gen_int_mode (ml, compute_mode),
4442                            NULL_RTX, 1, max_cost - extra_cost);
4443                         if (t3 != 0)
4444                           {
4445                             t4 = expand_shift
4446                               (RSHIFT_EXPR, compute_mode, t3,
4447                                post_shift, NULL_RTX, 1);
4448                             quotient = expand_binop (compute_mode, xor_optab,
4449                                                      t4, t1, tquotient, 0,
4450                                                      OPTAB_WIDEN);
4451                           }
4452                       }
4453                   }
4454               }
4455             else
4456               {
4457                 rtx nsign, t1, t2, t3, t4;
4458                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4459                                                   op0, constm1_rtx), NULL_RTX);
4460                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4461                                    0, OPTAB_WIDEN);
4462                 nsign = expand_shift
4463                   (RSHIFT_EXPR, compute_mode, t2,
4464                    size - 1, NULL_RTX, 0);
4465                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4466                                     NULL_RTX);
4467                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4468                                     NULL_RTX, 0);
4469                 if (t4)
4470                   {
4471                     rtx t5;
4472                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4473                                       NULL_RTX, 0);
4474                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4475                                                             t4, t5),
4476                                               tquotient);
4477                   }
4478               }
4479           }
4480
4481         if (quotient != 0)
4482           break;
4483         delete_insns_since (last);
4484
4485         /* Try using an instruction that produces both the quotient and
4486            remainder, using truncation.  We can easily compensate the quotient
4487            or remainder to get floor rounding, once we have the remainder.
4488            Notice that we compute also the final remainder value here,
4489            and return the result right away.  */
4490         if (target == 0 || GET_MODE (target) != compute_mode)
4491           target = gen_reg_rtx (compute_mode);
4492
4493         if (rem_flag)
4494           {
4495             remainder
4496               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4497             quotient = gen_reg_rtx (compute_mode);
4498           }
4499         else
4500           {
4501             quotient
4502               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4503             remainder = gen_reg_rtx (compute_mode);
4504           }
4505
4506         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4507                                  quotient, remainder, 0))
4508           {
4509             /* This could be computed with a branch-less sequence.
4510                Save that for later.  */
4511             rtx tem;
4512             rtx_code_label *label = gen_label_rtx ();
4513             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4514             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4515                                 NULL_RTX, 0, OPTAB_WIDEN);
4516             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4517             expand_dec (quotient, const1_rtx);
4518             expand_inc (remainder, op1);
4519             emit_label (label);
4520             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4521           }
4522
4523         /* No luck with division elimination or divmod.  Have to do it
4524            by conditionally adjusting op0 *and* the result.  */
4525         {
4526           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4527           rtx adjusted_op0;
4528           rtx tem;
4529
4530           quotient = gen_reg_rtx (compute_mode);
4531           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4532           label1 = gen_label_rtx ();
4533           label2 = gen_label_rtx ();
4534           label3 = gen_label_rtx ();
4535           label4 = gen_label_rtx ();
4536           label5 = gen_label_rtx ();
4537           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4538           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4539           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4540                               quotient, 0, OPTAB_LIB_WIDEN);
4541           if (tem != quotient)
4542             emit_move_insn (quotient, tem);
4543           emit_jump_insn (gen_jump (label5));
4544           emit_barrier ();
4545           emit_label (label1);
4546           expand_inc (adjusted_op0, const1_rtx);
4547           emit_jump_insn (gen_jump (label4));
4548           emit_barrier ();
4549           emit_label (label2);
4550           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4551           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4552                               quotient, 0, OPTAB_LIB_WIDEN);
4553           if (tem != quotient)
4554             emit_move_insn (quotient, tem);
4555           emit_jump_insn (gen_jump (label5));
4556           emit_barrier ();
4557           emit_label (label3);
4558           expand_dec (adjusted_op0, const1_rtx);
4559           emit_label (label4);
4560           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4561                               quotient, 0, OPTAB_LIB_WIDEN);
4562           if (tem != quotient)
4563             emit_move_insn (quotient, tem);
4564           expand_dec (quotient, const1_rtx);
4565           emit_label (label5);
4566         }
4567         break;
4568
4569       case CEIL_DIV_EXPR:
4570       case CEIL_MOD_EXPR:
4571         if (unsignedp)
4572           {
4573             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4574               {
4575                 rtx t1, t2, t3;
4576                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4577                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4578                                    floor_log2 (d), tquotient, 1);
4579                 t2 = expand_binop (compute_mode, and_optab, op0,
4580                                    gen_int_mode (d - 1, compute_mode),
4581                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4582                 t3 = gen_reg_rtx (compute_mode);
4583                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4584                                       compute_mode, 1, 1);
4585                 if (t3 == 0)
4586                   {
4587                     rtx_code_label *lab;
4588                     lab = gen_label_rtx ();
4589                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4590                     expand_inc (t1, const1_rtx);
4591                     emit_label (lab);
4592                     quotient = t1;
4593                   }
4594                 else
4595                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4596                                                           t1, t3),
4597                                             tquotient);
4598                 break;
4599               }
4600
4601             /* Try using an instruction that produces both the quotient and
4602                remainder, using truncation.  We can easily compensate the
4603                quotient or remainder to get ceiling rounding, once we have the
4604                remainder.  Notice that we compute also the final remainder
4605                value here, and return the result right away.  */
4606             if (target == 0 || GET_MODE (target) != compute_mode)
4607               target = gen_reg_rtx (compute_mode);
4608
4609             if (rem_flag)
4610               {
4611                 remainder = (REG_P (target)
4612                              ? target : gen_reg_rtx (compute_mode));
4613                 quotient = gen_reg_rtx (compute_mode);
4614               }
4615             else
4616               {
4617                 quotient = (REG_P (target)
4618                             ? target : gen_reg_rtx (compute_mode));
4619                 remainder = gen_reg_rtx (compute_mode);
4620               }
4621
4622             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4623                                      remainder, 1))
4624               {
4625                 /* This could be computed with a branch-less sequence.
4626                    Save that for later.  */
4627                 rtx_code_label *label = gen_label_rtx ();
4628                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4629                                  compute_mode, label);
4630                 expand_inc (quotient, const1_rtx);
4631                 expand_dec (remainder, op1);
4632                 emit_label (label);
4633                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4634               }
4635
4636             /* No luck with division elimination or divmod.  Have to do it
4637                by conditionally adjusting op0 *and* the result.  */
4638             {
4639               rtx_code_label *label1, *label2;
4640               rtx adjusted_op0, tem;
4641
4642               quotient = gen_reg_rtx (compute_mode);
4643               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4644               label1 = gen_label_rtx ();
4645               label2 = gen_label_rtx ();
4646               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4647                                compute_mode, label1);
4648               emit_move_insn  (quotient, const0_rtx);
4649               emit_jump_insn (gen_jump (label2));
4650               emit_barrier ();
4651               emit_label (label1);
4652               expand_dec (adjusted_op0, const1_rtx);
4653               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4654                                   quotient, 1, OPTAB_LIB_WIDEN);
4655               if (tem != quotient)
4656                 emit_move_insn (quotient, tem);
4657               expand_inc (quotient, const1_rtx);
4658               emit_label (label2);
4659             }
4660           }
4661         else /* signed */
4662           {
4663             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4664                 && INTVAL (op1) >= 0)
4665               {
4666                 /* This is extremely similar to the code for the unsigned case
4667                    above.  For 2.7 we should merge these variants, but for
4668                    2.6.1 I don't want to touch the code for unsigned since that
4669                    get used in C.  The signed case will only be used by other
4670                    languages (Ada).  */
4671
4672                 rtx t1, t2, t3;
4673                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4674                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4675                                    floor_log2 (d), tquotient, 0);
4676                 t2 = expand_binop (compute_mode, and_optab, op0,
4677                                    gen_int_mode (d - 1, compute_mode),
4678                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4679                 t3 = gen_reg_rtx (compute_mode);
4680                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4681                                       compute_mode, 1, 1);
4682                 if (t3 == 0)
4683                   {
4684                     rtx_code_label *lab;
4685                     lab = gen_label_rtx ();
4686                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4687                     expand_inc (t1, const1_rtx);
4688                     emit_label (lab);
4689                     quotient = t1;
4690                   }
4691                 else
4692                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4693                                                           t1, t3),
4694                                             tquotient);
4695                 break;
4696               }
4697
4698             /* Try using an instruction that produces both the quotient and
4699                remainder, using truncation.  We can easily compensate the
4700                quotient or remainder to get ceiling rounding, once we have the
4701                remainder.  Notice that we compute also the final remainder
4702                value here, and return the result right away.  */
4703             if (target == 0 || GET_MODE (target) != compute_mode)
4704               target = gen_reg_rtx (compute_mode);
4705             if (rem_flag)
4706               {
4707                 remainder= (REG_P (target)
4708                             ? target : gen_reg_rtx (compute_mode));
4709                 quotient = gen_reg_rtx (compute_mode);
4710               }
4711             else
4712               {
4713                 quotient = (REG_P (target)
4714                             ? target : gen_reg_rtx (compute_mode));
4715                 remainder = gen_reg_rtx (compute_mode);
4716               }
4717
4718             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4719                                      remainder, 0))
4720               {
4721                 /* This could be computed with a branch-less sequence.
4722                    Save that for later.  */
4723                 rtx tem;
4724                 rtx_code_label *label = gen_label_rtx ();
4725                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4726                                  compute_mode, label);
4727                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4728                                     NULL_RTX, 0, OPTAB_WIDEN);
4729                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4730                 expand_inc (quotient, const1_rtx);
4731                 expand_dec (remainder, op1);
4732                 emit_label (label);
4733                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4734               }
4735
4736             /* No luck with division elimination or divmod.  Have to do it
4737                by conditionally adjusting op0 *and* the result.  */
4738             {
4739               rtx_code_label *label1, *label2, *label3, *label4, *label5;
4740               rtx adjusted_op0;
4741               rtx tem;
4742
4743               quotient = gen_reg_rtx (compute_mode);
4744               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4745               label1 = gen_label_rtx ();
4746               label2 = gen_label_rtx ();
4747               label3 = gen_label_rtx ();
4748               label4 = gen_label_rtx ();
4749               label5 = gen_label_rtx ();
4750               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4751               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4752                                compute_mode, label1);
4753               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4754                                   quotient, 0, OPTAB_LIB_WIDEN);
4755               if (tem != quotient)
4756                 emit_move_insn (quotient, tem);
4757               emit_jump_insn (gen_jump (label5));
4758               emit_barrier ();
4759               emit_label (label1);
4760               expand_dec (adjusted_op0, const1_rtx);
4761               emit_jump_insn (gen_jump (label4));
4762               emit_barrier ();
4763               emit_label (label2);
4764               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4765                                compute_mode, label3);
4766               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4767                                   quotient, 0, OPTAB_LIB_WIDEN);
4768               if (tem != quotient)
4769                 emit_move_insn (quotient, tem);
4770               emit_jump_insn (gen_jump (label5));
4771               emit_barrier ();
4772               emit_label (label3);
4773               expand_inc (adjusted_op0, const1_rtx);
4774               emit_label (label4);
4775               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4776                                   quotient, 0, OPTAB_LIB_WIDEN);
4777               if (tem != quotient)
4778                 emit_move_insn (quotient, tem);
4779               expand_inc (quotient, const1_rtx);
4780               emit_label (label5);
4781             }
4782           }
4783         break;
4784
4785       case EXACT_DIV_EXPR:
4786         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4787           {
4788             HOST_WIDE_INT d = INTVAL (op1);
4789             unsigned HOST_WIDE_INT ml;
4790             int pre_shift;
4791             rtx t1;
4792
4793             pre_shift = floor_log2 (d & -d);
4794             ml = invert_mod2n (d >> pre_shift, size);
4795             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4796                                pre_shift, NULL_RTX, unsignedp);
4797             quotient = expand_mult (compute_mode, t1,
4798                                     gen_int_mode (ml, compute_mode),
4799                                     NULL_RTX, 1);
4800
4801             insn = get_last_insn ();
4802             set_dst_reg_note (insn, REG_EQUAL,
4803                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4804                                               compute_mode, op0, op1),
4805                               quotient);
4806           }
4807         break;
4808
4809       case ROUND_DIV_EXPR:
4810       case ROUND_MOD_EXPR:
4811         if (unsignedp)
4812           {
4813             rtx tem;
4814             rtx_code_label *label;
4815             label = gen_label_rtx ();
4816             quotient = gen_reg_rtx (compute_mode);
4817             remainder = gen_reg_rtx (compute_mode);
4818             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4819               {
4820                 rtx tem;
4821                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4822                                          quotient, 1, OPTAB_LIB_WIDEN);
4823                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4824                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4825                                           remainder, 1, OPTAB_LIB_WIDEN);
4826               }
4827             tem = plus_constant (compute_mode, op1, -1);
4828             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4829             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4830             expand_inc (quotient, const1_rtx);
4831             expand_dec (remainder, op1);
4832             emit_label (label);
4833           }
4834         else
4835           {
4836             rtx abs_rem, abs_op1, tem, mask;
4837             rtx_code_label *label;
4838             label = gen_label_rtx ();
4839             quotient = gen_reg_rtx (compute_mode);
4840             remainder = gen_reg_rtx (compute_mode);
4841             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4842               {
4843                 rtx tem;
4844                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4845                                          quotient, 0, OPTAB_LIB_WIDEN);
4846                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4847                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4848                                           remainder, 0, OPTAB_LIB_WIDEN);
4849               }
4850             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4851             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4852             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4853                                 1, NULL_RTX, 1);
4854             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4855             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4856                                 NULL_RTX, 0, OPTAB_WIDEN);
4857             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4858                                  size - 1, NULL_RTX, 0);
4859             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4860                                 NULL_RTX, 0, OPTAB_WIDEN);
4861             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4862                                 NULL_RTX, 0, OPTAB_WIDEN);
4863             expand_inc (quotient, tem);
4864             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4865                                 NULL_RTX, 0, OPTAB_WIDEN);
4866             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4867                                 NULL_RTX, 0, OPTAB_WIDEN);
4868             expand_dec (remainder, tem);
4869             emit_label (label);
4870           }
4871         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4872
4873       default:
4874         gcc_unreachable ();
4875       }
4876
4877   if (quotient == 0)
4878     {
4879       if (target && GET_MODE (target) != compute_mode)
4880         target = 0;
4881
4882       if (rem_flag)
4883         {
4884           /* Try to produce the remainder without producing the quotient.
4885              If we seem to have a divmod pattern that does not require widening,
4886              don't try widening here.  We should really have a WIDEN argument
4887              to expand_twoval_binop, since what we'd really like to do here is
4888              1) try a mod insn in compute_mode
4889              2) try a divmod insn in compute_mode
4890              3) try a div insn in compute_mode and multiply-subtract to get
4891                 remainder
4892              4) try the same things with widening allowed.  */
4893           remainder
4894             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4895                                  op0, op1, target,
4896                                  unsignedp,
4897                                  ((optab_handler (optab2, compute_mode)
4898                                    != CODE_FOR_nothing)
4899                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4900           if (remainder == 0)
4901             {
4902               /* No luck there.  Can we do remainder and divide at once
4903                  without a library call?  */
4904               remainder = gen_reg_rtx (compute_mode);
4905               if (! expand_twoval_binop ((unsignedp
4906                                           ? udivmod_optab
4907                                           : sdivmod_optab),
4908                                          op0, op1,
4909                                          NULL_RTX, remainder, unsignedp))
4910                 remainder = 0;
4911             }
4912
4913           if (remainder)
4914             return gen_lowpart (mode, remainder);
4915         }
4916
4917       /* Produce the quotient.  Try a quotient insn, but not a library call.
4918          If we have a divmod in this mode, use it in preference to widening
4919          the div (for this test we assume it will not fail). Note that optab2
4920          is set to the one of the two optabs that the call below will use.  */
4921       quotient
4922         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4923                              op0, op1, rem_flag ? NULL_RTX : target,
4924                              unsignedp,
4925                              ((optab_handler (optab2, compute_mode)
4926                                != CODE_FOR_nothing)
4927                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4928
4929       if (quotient == 0)
4930         {
4931           /* No luck there.  Try a quotient-and-remainder insn,
4932              keeping the quotient alone.  */
4933           quotient = gen_reg_rtx (compute_mode);
4934           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4935                                      op0, op1,
4936                                      quotient, NULL_RTX, unsignedp))
4937             {
4938               quotient = 0;
4939               if (! rem_flag)
4940                 /* Still no luck.  If we are not computing the remainder,
4941                    use a library call for the quotient.  */
4942                 quotient = sign_expand_binop (compute_mode,
4943                                               udiv_optab, sdiv_optab,
4944                                               op0, op1, target,
4945                                               unsignedp, OPTAB_LIB_WIDEN);
4946             }
4947         }
4948     }
4949
4950   if (rem_flag)
4951     {
4952       if (target && GET_MODE (target) != compute_mode)
4953         target = 0;
4954
4955       if (quotient == 0)
4956         {
4957           /* No divide instruction either.  Use library for remainder.  */
4958           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4959                                          op0, op1, target,
4960                                          unsignedp, OPTAB_LIB_WIDEN);
4961           /* No remainder function.  Try a quotient-and-remainder
4962              function, keeping the remainder.  */
4963           if (!remainder)
4964             {
4965               remainder = gen_reg_rtx (compute_mode);
4966               if (!expand_twoval_binop_libfunc
4967                   (unsignedp ? udivmod_optab : sdivmod_optab,
4968                    op0, op1,
4969                    NULL_RTX, remainder,
4970                    unsignedp ? UMOD : MOD))
4971                 remainder = NULL_RTX;
4972             }
4973         }
4974       else
4975         {
4976           /* We divided.  Now finish doing X - Y * (X / Y).  */
4977           remainder = expand_mult (compute_mode, quotient, op1,
4978                                    NULL_RTX, unsignedp);
4979           remainder = expand_binop (compute_mode, sub_optab, op0,
4980                                     remainder, target, unsignedp,
4981                                     OPTAB_LIB_WIDEN);
4982         }
4983     }
4984
4985   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4986 }
4987 \f
4988 /* Return a tree node with data type TYPE, describing the value of X.
4989    Usually this is an VAR_DECL, if there is no obvious better choice.
4990    X may be an expression, however we only support those expressions
4991    generated by loop.c.  */
4992
4993 tree
4994 make_tree (tree type, rtx x)
4995 {
4996   tree t;
4997
4998   switch (GET_CODE (x))
4999     {
5000     case CONST_INT:
5001     case CONST_WIDE_INT:
5002       t = wide_int_to_tree (type, std::make_pair (x, TYPE_MODE (type)));
5003       return t;
5004
5005     case CONST_DOUBLE:
5006       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5007       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5008         t = wide_int_to_tree (type,
5009                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5010                                                     HOST_BITS_PER_WIDE_INT * 2));
5011       else
5012         {
5013           REAL_VALUE_TYPE d;
5014
5015           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
5016           t = build_real (type, d);
5017         }
5018
5019       return t;
5020
5021     case CONST_VECTOR:
5022       {
5023         int units = CONST_VECTOR_NUNITS (x);
5024         tree itype = TREE_TYPE (type);
5025         tree *elts;
5026         int i;
5027
5028         /* Build a tree with vector elements.  */
5029         elts = XALLOCAVEC (tree, units);
5030         for (i = units - 1; i >= 0; --i)
5031           {
5032             rtx elt = CONST_VECTOR_ELT (x, i);
5033             elts[i] = make_tree (itype, elt);
5034           }
5035
5036         return build_vector (type, elts);
5037       }
5038
5039     case PLUS:
5040       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5041                           make_tree (type, XEXP (x, 1)));
5042
5043     case MINUS:
5044       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5045                           make_tree (type, XEXP (x, 1)));
5046
5047     case NEG:
5048       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5049
5050     case MULT:
5051       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5052                           make_tree (type, XEXP (x, 1)));
5053
5054     case ASHIFT:
5055       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5056                           make_tree (type, XEXP (x, 1)));
5057
5058     case LSHIFTRT:
5059       t = unsigned_type_for (type);
5060       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5061                                          make_tree (t, XEXP (x, 0)),
5062                                          make_tree (type, XEXP (x, 1))));
5063
5064     case ASHIFTRT:
5065       t = signed_type_for (type);
5066       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5067                                          make_tree (t, XEXP (x, 0)),
5068                                          make_tree (type, XEXP (x, 1))));
5069
5070     case DIV:
5071       if (TREE_CODE (type) != REAL_TYPE)
5072         t = signed_type_for (type);
5073       else
5074         t = type;
5075
5076       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5077                                          make_tree (t, XEXP (x, 0)),
5078                                          make_tree (t, XEXP (x, 1))));
5079     case UDIV:
5080       t = unsigned_type_for (type);
5081       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5082                                          make_tree (t, XEXP (x, 0)),
5083                                          make_tree (t, XEXP (x, 1))));
5084
5085     case SIGN_EXTEND:
5086     case ZERO_EXTEND:
5087       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5088                                           GET_CODE (x) == ZERO_EXTEND);
5089       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5090
5091     case CONST:
5092       return make_tree (type, XEXP (x, 0));
5093
5094     case SYMBOL_REF:
5095       t = SYMBOL_REF_DECL (x);
5096       if (t)
5097         return fold_convert (type, build_fold_addr_expr (t));
5098       /* else fall through.  */
5099
5100     default:
5101       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5102
5103       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5104          address mode to pointer mode.  */
5105       if (POINTER_TYPE_P (type))
5106         x = convert_memory_address_addr_space
5107               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5108
5109       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5110          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5111       t->decl_with_rtl.rtl = x;
5112
5113       return t;
5114     }
5115 }
5116 \f
5117 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5118    and returning TARGET.
5119
5120    If TARGET is 0, a pseudo-register or constant is returned.  */
5121
5122 rtx
5123 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5124 {
5125   rtx tem = 0;
5126
5127   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5128     tem = simplify_binary_operation (AND, mode, op0, op1);
5129   if (tem == 0)
5130     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5131
5132   if (target == 0)
5133     target = tem;
5134   else if (tem != target)
5135     emit_move_insn (target, tem);
5136   return target;
5137 }
5138
5139 /* Helper function for emit_store_flag.  */
5140 rtx
5141 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5142              machine_mode mode, machine_mode compare_mode,
5143              int unsignedp, rtx x, rtx y, int normalizep,
5144              machine_mode target_mode)
5145 {
5146   struct expand_operand ops[4];
5147   rtx op0, comparison, subtarget;
5148   rtx_insn *last;
5149   machine_mode result_mode = targetm.cstore_mode (icode);
5150
5151   last = get_last_insn ();
5152   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5153   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5154   if (!x || !y)
5155     {
5156       delete_insns_since (last);
5157       return NULL_RTX;
5158     }
5159
5160   if (target_mode == VOIDmode)
5161     target_mode = result_mode;
5162   if (!target)
5163     target = gen_reg_rtx (target_mode);
5164
5165   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5166
5167   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5168   create_fixed_operand (&ops[1], comparison);
5169   create_fixed_operand (&ops[2], x);
5170   create_fixed_operand (&ops[3], y);
5171   if (!maybe_expand_insn (icode, 4, ops))
5172     {
5173       delete_insns_since (last);
5174       return NULL_RTX;
5175     }
5176   subtarget = ops[0].value;
5177
5178   /* If we are converting to a wider mode, first convert to
5179      TARGET_MODE, then normalize.  This produces better combining
5180      opportunities on machines that have a SIGN_EXTRACT when we are
5181      testing a single bit.  This mostly benefits the 68k.
5182
5183      If STORE_FLAG_VALUE does not have the sign bit set when
5184      interpreted in MODE, we can do this conversion as unsigned, which
5185      is usually more efficient.  */
5186   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5187     {
5188       convert_move (target, subtarget,
5189                     val_signbit_known_clear_p (result_mode,
5190                                                STORE_FLAG_VALUE));
5191       op0 = target;
5192       result_mode = target_mode;
5193     }
5194   else
5195     op0 = subtarget;
5196
5197   /* If we want to keep subexpressions around, don't reuse our last
5198      target.  */
5199   if (optimize)
5200     subtarget = 0;
5201
5202   /* Now normalize to the proper value in MODE.  Sometimes we don't
5203      have to do anything.  */
5204   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5205     ;
5206   /* STORE_FLAG_VALUE might be the most negative number, so write
5207      the comparison this way to avoid a compiler-time warning.  */
5208   else if (- normalizep == STORE_FLAG_VALUE)
5209     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5210
5211   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5212      it hard to use a value of just the sign bit due to ANSI integer
5213      constant typing rules.  */
5214   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5215     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5216                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5217                         normalizep == 1);
5218   else
5219     {
5220       gcc_assert (STORE_FLAG_VALUE & 1);
5221
5222       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5223       if (normalizep == -1)
5224         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5225     }
5226
5227   /* If we were converting to a smaller mode, do the conversion now.  */
5228   if (target_mode != result_mode)
5229     {
5230       convert_move (target, op0, 0);
5231       return target;
5232     }
5233   else
5234     return op0;
5235 }
5236
5237
5238 /* A subroutine of emit_store_flag only including "tricks" that do not
5239    need a recursive call.  These are kept separate to avoid infinite
5240    loops.  */
5241
5242 static rtx
5243 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5244                    machine_mode mode, int unsignedp, int normalizep,
5245                    machine_mode target_mode)
5246 {
5247   rtx subtarget;
5248   enum insn_code icode;
5249   machine_mode compare_mode;
5250   enum mode_class mclass;
5251   enum rtx_code scode;
5252
5253   if (unsignedp)
5254     code = unsigned_condition (code);
5255   scode = swap_condition (code);
5256
5257   /* If one operand is constant, make it the second one.  Only do this
5258      if the other operand is not constant as well.  */
5259
5260   if (swap_commutative_operands_p (op0, op1))
5261     {
5262       std::swap (op0, op1);
5263       code = swap_condition (code);
5264     }
5265
5266   if (mode == VOIDmode)
5267     mode = GET_MODE (op0);
5268
5269   /* For some comparisons with 1 and -1, we can convert this to
5270      comparisons with zero.  This will often produce more opportunities for
5271      store-flag insns.  */
5272
5273   switch (code)
5274     {
5275     case LT:
5276       if (op1 == const1_rtx)
5277         op1 = const0_rtx, code = LE;
5278       break;
5279     case LE:
5280       if (op1 == constm1_rtx)
5281         op1 = const0_rtx, code = LT;
5282       break;
5283     case GE:
5284       if (op1 == const1_rtx)
5285         op1 = const0_rtx, code = GT;
5286       break;
5287     case GT:
5288       if (op1 == constm1_rtx)
5289         op1 = const0_rtx, code = GE;
5290       break;
5291     case GEU:
5292       if (op1 == const1_rtx)
5293         op1 = const0_rtx, code = NE;
5294       break;
5295     case LTU:
5296       if (op1 == const1_rtx)
5297         op1 = const0_rtx, code = EQ;
5298       break;
5299     default:
5300       break;
5301     }
5302
5303   /* If we are comparing a double-word integer with zero or -1, we can
5304      convert the comparison into one involving a single word.  */
5305   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5306       && GET_MODE_CLASS (mode) == MODE_INT
5307       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5308     {
5309       rtx tem;
5310       if ((code == EQ || code == NE)
5311           && (op1 == const0_rtx || op1 == constm1_rtx))
5312         {
5313           rtx op00, op01;
5314
5315           /* Do a logical OR or AND of the two words and compare the
5316              result.  */
5317           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5318           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5319           tem = expand_binop (word_mode,
5320                               op1 == const0_rtx ? ior_optab : and_optab,
5321                               op00, op01, NULL_RTX, unsignedp,
5322                               OPTAB_DIRECT);
5323
5324           if (tem != 0)
5325             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5326                                    unsignedp, normalizep);
5327         }
5328       else if ((code == LT || code == GE) && op1 == const0_rtx)
5329         {
5330           rtx op0h;
5331
5332           /* If testing the sign bit, can just test on high word.  */
5333           op0h = simplify_gen_subreg (word_mode, op0, mode,
5334                                       subreg_highpart_offset (word_mode,
5335                                                               mode));
5336           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5337                                  unsignedp, normalizep);
5338         }
5339       else
5340         tem = NULL_RTX;
5341
5342       if (tem)
5343         {
5344           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5345             return tem;
5346           if (!target)
5347             target = gen_reg_rtx (target_mode);
5348
5349           convert_move (target, tem,
5350                         !val_signbit_known_set_p (word_mode,
5351                                                   (normalizep ? normalizep
5352                                                    : STORE_FLAG_VALUE)));
5353           return target;
5354         }
5355     }
5356
5357   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5358      complement of A (for GE) and shifting the sign bit to the low bit.  */
5359   if (op1 == const0_rtx && (code == LT || code == GE)
5360       && GET_MODE_CLASS (mode) == MODE_INT
5361       && (normalizep || STORE_FLAG_VALUE == 1
5362           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5363     {
5364       subtarget = target;
5365
5366       if (!target)
5367         target_mode = mode;
5368
5369       /* If the result is to be wider than OP0, it is best to convert it
5370          first.  If it is to be narrower, it is *incorrect* to convert it
5371          first.  */
5372       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5373         {
5374           op0 = convert_modes (target_mode, mode, op0, 0);
5375           mode = target_mode;
5376         }
5377
5378       if (target_mode != mode)
5379         subtarget = 0;
5380
5381       if (code == GE)
5382         op0 = expand_unop (mode, one_cmpl_optab, op0,
5383                            ((STORE_FLAG_VALUE == 1 || normalizep)
5384                             ? 0 : subtarget), 0);
5385
5386       if (STORE_FLAG_VALUE == 1 || normalizep)
5387         /* If we are supposed to produce a 0/1 value, we want to do
5388            a logical shift from the sign bit to the low-order bit; for
5389            a -1/0 value, we do an arithmetic shift.  */
5390         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5391                             GET_MODE_BITSIZE (mode) - 1,
5392                             subtarget, normalizep != -1);
5393
5394       if (mode != target_mode)
5395         op0 = convert_modes (target_mode, mode, op0, 0);
5396
5397       return op0;
5398     }
5399
5400   mclass = GET_MODE_CLASS (mode);
5401   for (compare_mode = mode; compare_mode != VOIDmode;
5402        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5403     {
5404      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5405      icode = optab_handler (cstore_optab, optab_mode);
5406      if (icode != CODE_FOR_nothing)
5407         {
5408           do_pending_stack_adjust ();
5409           rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5410                                  unsignedp, op0, op1, normalizep, target_mode);
5411           if (tem)
5412             return tem;
5413
5414           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5415             {
5416               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5417                                  unsignedp, op1, op0, normalizep, target_mode);
5418               if (tem)
5419                 return tem;
5420             }
5421           break;
5422         }
5423     }
5424
5425   return 0;
5426 }
5427
5428 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5429    and storing in TARGET.  Normally return TARGET.
5430    Return 0 if that cannot be done.
5431
5432    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5433    it is VOIDmode, they cannot both be CONST_INT.
5434
5435    UNSIGNEDP is for the case where we have to widen the operands
5436    to perform the operation.  It says to use zero-extension.
5437
5438    NORMALIZEP is 1 if we should convert the result to be either zero
5439    or one.  Normalize is -1 if we should convert the result to be
5440    either zero or -1.  If NORMALIZEP is zero, the result will be left
5441    "raw" out of the scc insn.  */
5442
5443 rtx
5444 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5445                  machine_mode mode, int unsignedp, int normalizep)
5446 {
5447   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5448   enum rtx_code rcode;
5449   rtx subtarget;
5450   rtx tem, trueval;
5451   rtx_insn *last;
5452
5453   /* If we compare constants, we shouldn't use a store-flag operation,
5454      but a constant load.  We can get there via the vanilla route that
5455      usually generates a compare-branch sequence, but will in this case
5456      fold the comparison to a constant, and thus elide the branch.  */
5457   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5458     return NULL_RTX;
5459
5460   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5461                            target_mode);
5462   if (tem)
5463     return tem;
5464
5465   /* If we reached here, we can't do this with a scc insn, however there
5466      are some comparisons that can be done in other ways.  Don't do any
5467      of these cases if branches are very cheap.  */
5468   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5469     return 0;
5470
5471   /* See what we need to return.  We can only return a 1, -1, or the
5472      sign bit.  */
5473
5474   if (normalizep == 0)
5475     {
5476       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5477         normalizep = STORE_FLAG_VALUE;
5478
5479       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5480         ;
5481       else
5482         return 0;
5483     }
5484
5485   last = get_last_insn ();
5486
5487   /* If optimizing, use different pseudo registers for each insn, instead
5488      of reusing the same pseudo.  This leads to better CSE, but slows
5489      down the compiler, since there are more pseudos */
5490   subtarget = (!optimize
5491                && (target_mode == mode)) ? target : NULL_RTX;
5492   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5493
5494   /* For floating-point comparisons, try the reverse comparison or try
5495      changing the "orderedness" of the comparison.  */
5496   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5497     {
5498       enum rtx_code first_code;
5499       bool and_them;
5500
5501       rcode = reverse_condition_maybe_unordered (code);
5502       if (can_compare_p (rcode, mode, ccp_store_flag)
5503           && (code == ORDERED || code == UNORDERED
5504               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5505               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5506         {
5507           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5508                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5509
5510           /* For the reverse comparison, use either an addition or a XOR.  */
5511           if (want_add
5512               && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5513                            optimize_insn_for_speed_p ()) == 0)
5514             {
5515               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5516                                        STORE_FLAG_VALUE, target_mode);
5517               if (tem)
5518                 return expand_binop (target_mode, add_optab, tem,
5519                                      gen_int_mode (normalizep, target_mode),
5520                                      target, 0, OPTAB_WIDEN);
5521             }
5522           else if (!want_add
5523                    && rtx_cost (trueval, XOR, 1,
5524                                 optimize_insn_for_speed_p ()) == 0)
5525             {
5526               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5527                                        normalizep, target_mode);
5528               if (tem)
5529                 return expand_binop (target_mode, xor_optab, tem, trueval,
5530                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5531             }
5532         }
5533
5534       delete_insns_since (last);
5535
5536       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5537       if (code == ORDERED || code == UNORDERED)
5538         return 0;
5539
5540       and_them = split_comparison (code, mode, &first_code, &code);
5541
5542       /* If there are no NaNs, the first comparison should always fall through.
5543          Effectively change the comparison to the other one.  */
5544       if (!HONOR_NANS (mode))
5545         {
5546           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5547           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5548                                     target_mode);
5549         }
5550
5551       if (!HAVE_conditional_move)
5552         return 0;
5553
5554       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5555          conditional move.  */
5556       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5557                                normalizep, target_mode);
5558       if (tem == 0)
5559         return 0;
5560
5561       if (and_them)
5562         tem = emit_conditional_move (target, code, op0, op1, mode,
5563                                      tem, const0_rtx, GET_MODE (tem), 0);
5564       else
5565         tem = emit_conditional_move (target, code, op0, op1, mode,
5566                                      trueval, tem, GET_MODE (tem), 0);
5567
5568       if (tem == 0)
5569         delete_insns_since (last);
5570       return tem;
5571     }
5572
5573   /* The remaining tricks only apply to integer comparisons.  */
5574
5575   if (GET_MODE_CLASS (mode) != MODE_INT)
5576     return 0;
5577
5578   /* If this is an equality comparison of integers, we can try to exclusive-or
5579      (or subtract) the two operands and use a recursive call to try the
5580      comparison with zero.  Don't do any of these cases if branches are
5581      very cheap.  */
5582
5583   if ((code == EQ || code == NE) && op1 != const0_rtx)
5584     {
5585       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5586                           OPTAB_WIDEN);
5587
5588       if (tem == 0)
5589         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5590                             OPTAB_WIDEN);
5591       if (tem != 0)
5592         tem = emit_store_flag (target, code, tem, const0_rtx,
5593                                mode, unsignedp, normalizep);
5594       if (tem != 0)
5595         return tem;
5596
5597       delete_insns_since (last);
5598     }
5599
5600   /* For integer comparisons, try the reverse comparison.  However, for
5601      small X and if we'd have anyway to extend, implementing "X != 0"
5602      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5603   rcode = reverse_condition (code);
5604   if (can_compare_p (rcode, mode, ccp_store_flag)
5605       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5606             && code == NE
5607             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5608             && op1 == const0_rtx))
5609     {
5610       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5611                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5612
5613       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5614       if (want_add
5615           && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5616                        optimize_insn_for_speed_p ()) == 0)
5617         {
5618           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5619                                    STORE_FLAG_VALUE, target_mode);
5620           if (tem != 0)
5621             tem = expand_binop (target_mode, add_optab, tem,
5622                                 gen_int_mode (normalizep, target_mode),
5623                                 target, 0, OPTAB_WIDEN);
5624         }
5625       else if (!want_add
5626                && rtx_cost (trueval, XOR, 1,
5627                             optimize_insn_for_speed_p ()) == 0)
5628         {
5629           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5630                                    normalizep, target_mode);
5631           if (tem != 0)
5632             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5633                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5634         }
5635
5636       if (tem != 0)
5637         return tem;
5638       delete_insns_since (last);
5639     }
5640
5641   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5642      the constant zero.  Reject all other comparisons at this point.  Only
5643      do LE and GT if branches are expensive since they are expensive on
5644      2-operand machines.  */
5645
5646   if (op1 != const0_rtx
5647       || (code != EQ && code != NE
5648           && (BRANCH_COST (optimize_insn_for_speed_p (),
5649                            false) <= 1 || (code != LE && code != GT))))
5650     return 0;
5651
5652   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5653      do the necessary operation below.  */
5654
5655   tem = 0;
5656
5657   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5658      the sign bit set.  */
5659
5660   if (code == LE)
5661     {
5662       /* This is destructive, so SUBTARGET can't be OP0.  */
5663       if (rtx_equal_p (subtarget, op0))
5664         subtarget = 0;
5665
5666       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5667                           OPTAB_WIDEN);
5668       if (tem)
5669         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5670                             OPTAB_WIDEN);
5671     }
5672
5673   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5674      number of bits in the mode of OP0, minus one.  */
5675
5676   if (code == GT)
5677     {
5678       if (rtx_equal_p (subtarget, op0))
5679         subtarget = 0;
5680
5681       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5682                           GET_MODE_BITSIZE (mode) - 1,
5683                           subtarget, 0);
5684       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5685                           OPTAB_WIDEN);
5686     }
5687
5688   if (code == EQ || code == NE)
5689     {
5690       /* For EQ or NE, one way to do the comparison is to apply an operation
5691          that converts the operand into a positive number if it is nonzero
5692          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5693          for NE we negate.  This puts the result in the sign bit.  Then we
5694          normalize with a shift, if needed.
5695
5696          Two operations that can do the above actions are ABS and FFS, so try
5697          them.  If that doesn't work, and MODE is smaller than a full word,
5698          we can use zero-extension to the wider mode (an unsigned conversion)
5699          as the operation.  */
5700
5701       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5702          that is compensated by the subsequent overflow when subtracting
5703          one / negating.  */
5704
5705       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5706         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5707       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5708         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5709       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5710         {
5711           tem = convert_modes (word_mode, mode, op0, 1);
5712           mode = word_mode;
5713         }
5714
5715       if (tem != 0)
5716         {
5717           if (code == EQ)
5718             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5719                                 0, OPTAB_WIDEN);
5720           else
5721             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5722         }
5723
5724       /* If we couldn't do it that way, for NE we can "or" the two's complement
5725          of the value with itself.  For EQ, we take the one's complement of
5726          that "or", which is an extra insn, so we only handle EQ if branches
5727          are expensive.  */
5728
5729       if (tem == 0
5730           && (code == NE
5731               || BRANCH_COST (optimize_insn_for_speed_p (),
5732                               false) > 1))
5733         {
5734           if (rtx_equal_p (subtarget, op0))
5735             subtarget = 0;
5736
5737           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5738           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5739                               OPTAB_WIDEN);
5740
5741           if (tem && code == EQ)
5742             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5743         }
5744     }
5745
5746   if (tem && normalizep)
5747     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5748                         GET_MODE_BITSIZE (mode) - 1,
5749                         subtarget, normalizep == 1);
5750
5751   if (tem)
5752     {
5753       if (!target)
5754         ;
5755       else if (GET_MODE (tem) != target_mode)
5756         {
5757           convert_move (target, tem, 0);
5758           tem = target;
5759         }
5760       else if (!subtarget)
5761         {
5762           emit_move_insn (target, tem);
5763           tem = target;
5764         }
5765     }
5766   else
5767     delete_insns_since (last);
5768
5769   return tem;
5770 }
5771
5772 /* Like emit_store_flag, but always succeeds.  */
5773
5774 rtx
5775 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5776                        machine_mode mode, int unsignedp, int normalizep)
5777 {
5778   rtx tem;
5779   rtx_code_label *label;
5780   rtx trueval, falseval;
5781
5782   /* First see if emit_store_flag can do the job.  */
5783   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5784   if (tem != 0)
5785     return tem;
5786
5787   if (!target)
5788     target = gen_reg_rtx (word_mode);
5789
5790   /* If this failed, we have to do this with set/compare/jump/set code.
5791      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5792   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5793   if (code == NE
5794       && GET_MODE_CLASS (mode) == MODE_INT
5795       && REG_P (target)
5796       && op0 == target
5797       && op1 == const0_rtx)
5798     {
5799       label = gen_label_rtx ();
5800       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
5801                                NULL_RTX, NULL, label, -1);
5802       emit_move_insn (target, trueval);
5803       emit_label (label);
5804       return target;
5805     }
5806
5807   if (!REG_P (target)
5808       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5809     target = gen_reg_rtx (GET_MODE (target));
5810
5811   /* Jump in the right direction if the target cannot implement CODE
5812      but can jump on its reverse condition.  */
5813   falseval = const0_rtx;
5814   if (! can_compare_p (code, mode, ccp_jump)
5815       && (! FLOAT_MODE_P (mode)
5816           || code == ORDERED || code == UNORDERED
5817           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5818           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5819     {
5820       enum rtx_code rcode;
5821       if (FLOAT_MODE_P (mode))
5822         rcode = reverse_condition_maybe_unordered (code);
5823       else
5824         rcode = reverse_condition (code);
5825
5826       /* Canonicalize to UNORDERED for the libcall.  */
5827       if (can_compare_p (rcode, mode, ccp_jump)
5828           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5829         {
5830           falseval = trueval;
5831           trueval = const0_rtx;
5832           code = rcode;
5833         }
5834     }
5835
5836   emit_move_insn (target, trueval);
5837   label = gen_label_rtx ();
5838   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
5839                            label, -1);
5840
5841   emit_move_insn (target, falseval);
5842   emit_label (label);
5843
5844   return target;
5845 }
5846 \f
5847 /* Perform possibly multi-word comparison and conditional jump to LABEL
5848    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5849    now a thin wrapper around do_compare_rtx_and_jump.  */
5850
5851 static void
5852 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
5853                  rtx_code_label *label)
5854 {
5855   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5856   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
5857                            NULL, label, -1);
5858 }