gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2015 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "diagnostic-core.h"
  27 #include "rtl.h"
  28 #include "hash-set.h"
  29 #include "machmode.h"
  30 #include "vec.h"
  31 #include "double-int.h"
  32 #include "input.h"
  33 #include "alias.h"
  34 #include "symtab.h"
  35 #include "wide-int.h"
  36 #include "inchash.h"
  37 #include "tree.h"
  38 #include "fold-const.h"
  39 #include "stor-layout.h"
  40 #include "tm_p.h"
  41 #include "flags.h"
  42 #include "insn-config.h"
  43 #include "hashtab.h"
  44 #include "hard-reg-set.h"
  45 #include "function.h"
  46 #include "statistics.h"
  47 #include "real.h"
  48 #include "fixed-value.h"
  49 #include "expmed.h"
  50 #include "dojump.h"
  51 #include "explow.h"
  52 #include "calls.h"
  53 #include "emit-rtl.h"
  54 #include "varasm.h"
  55 #include "stmt.h"
  56 #include "expr.h"
  57 #include "insn-codes.h"
  58 #include "optabs.h"
  59 #include "recog.h"
  60 #include "langhooks.h"
  61 #include "predict.h"
  62 #include "basic-block.h"
  63 #include "df.h"
  64 #include "target.h"
  65
  66 struct target_expmed default_target_expmed;
  67 #if SWITCHABLE_TARGET
  68 struct target_expmed *this_target_expmed = &default_target_expmed;
  69 #endif
  70
  71 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  72                                    unsigned HOST_WIDE_INT,
  73                                    unsigned HOST_WIDE_INT,
  74                                    unsigned HOST_WIDE_INT,
  75                                    rtx);
  76 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
  77                                      unsigned HOST_WIDE_INT,
  78                                      rtx);
  79 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  80                                    unsigned HOST_WIDE_INT,
  81                                    unsigned HOST_WIDE_INT,
  82                                    unsigned HOST_WIDE_INT,
  83                                    rtx);
  84 static rtx extract_fixed_bit_field (machine_mode, rtx,
  85                                     unsigned HOST_WIDE_INT,
  86                                     unsigned HOST_WIDE_INT, rtx, int);
  87 static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
  88                                       unsigned HOST_WIDE_INT,
  89                                       unsigned HOST_WIDE_INT, rtx, int);
  90 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  91 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  92                                     unsigned HOST_WIDE_INT, int);
  93 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  94 static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  95 static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  96
  97 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  98    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  99    The mask is truncated if necessary to the width of mode MODE.  The
 100    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
 101
 102 static inline rtx
 103 mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
 104 {
 105   return immed_wide_int_const
 106     (wi::shifted_mask (bitpos, bitsize, complement,
 107                        GET_MODE_PRECISION (mode)), mode);
 108 }
 109
 110 /* Test whether a value is zero of a power of two.  */
 111 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
 112   (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
 113
 114 struct init_expmed_rtl
 115 {
 116   rtx reg;
 117   rtx plus;
 118   rtx neg;
 119   rtx mult;
 120   rtx sdiv;
 121   rtx udiv;
 122   rtx sdiv_32;
 123   rtx smod_32;
 124   rtx wide_mult;
 125   rtx wide_lshr;
 126   rtx wide_trunc;
 127   rtx shift;
 128   rtx shift_mult;
 129   rtx shift_add;
 130   rtx shift_sub0;
 131   rtx shift_sub1;
 132   rtx zext;
 133   rtx trunc;
 134
 135   rtx pow2[MAX_BITS_PER_WORD];
 136   rtx cint[MAX_BITS_PER_WORD];
 137 };
 138
 139 static void
 140 init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
 141                       machine_mode from_mode, bool speed)
 142 {
 143   int to_size, from_size;
 144   rtx which;
 145
 146   to_size = GET_MODE_PRECISION (to_mode);
 147   from_size = GET_MODE_PRECISION (from_mode);
 148
 149   /* Most partial integers have a precision less than the "full"
 150      integer it requires for storage.  In case one doesn't, for
 151      comparison purposes here, reduce the bit size by one in that
 152      case.  */
 153   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 154       && exact_log2 (to_size) != -1)
 155     to_size --;
 156   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 157       && exact_log2 (from_size) != -1)
 158     from_size --;
 159
 160   /* Assume cost of zero-extend and sign-extend is the same.  */
 161   which = (to_size < from_size ? all->trunc : all->zext);
 162
 163   PUT_MODE (all->reg, from_mode);
 164   set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
 165 }
 166
 167 static void
 168 init_expmed_one_mode (struct init_expmed_rtl *all,
 169                       machine_mode mode, int speed)
 170 {
 171   int m, n, mode_bitsize;
 172   machine_mode mode_from;
 173
 174   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 175
 176   PUT_MODE (all->reg, mode);
 177   PUT_MODE (all->plus, mode);
 178   PUT_MODE (all->neg, mode);
 179   PUT_MODE (all->mult, mode);
 180   PUT_MODE (all->sdiv, mode);
 181   PUT_MODE (all->udiv, mode);
 182   PUT_MODE (all->sdiv_32, mode);
 183   PUT_MODE (all->smod_32, mode);
 184   PUT_MODE (all->wide_trunc, mode);
 185   PUT_MODE (all->shift, mode);
 186   PUT_MODE (all->shift_mult, mode);
 187   PUT_MODE (all->shift_add, mode);
 188   PUT_MODE (all->shift_sub0, mode);
 189   PUT_MODE (all->shift_sub1, mode);
 190   PUT_MODE (all->zext, mode);
 191   PUT_MODE (all->trunc, mode);
 192
 193   set_add_cost (speed, mode, set_src_cost (all->plus, speed));
 194   set_neg_cost (speed, mode, set_src_cost (all->neg, speed));
 195   set_mul_cost (speed, mode, set_src_cost (all->mult, speed));
 196   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, speed));
 197   set_udiv_cost (speed, mode, set_src_cost (all->udiv, speed));
 198
 199   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, speed)
 200                                      <= 2 * add_cost (speed, mode)));
 201   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, speed)
 202                                      <= 4 * add_cost (speed, mode)));
 203
 204   set_shift_cost (speed, mode, 0, 0);
 205   {
 206     int cost = add_cost (speed, mode);
 207     set_shiftadd_cost (speed, mode, 0, cost);
 208     set_shiftsub0_cost (speed, mode, 0, cost);
 209     set_shiftsub1_cost (speed, mode, 0, cost);
 210   }
 211
 212   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 213   for (m = 1; m < n; m++)
 214     {
 215       XEXP (all->shift, 1) = all->cint[m];
 216       XEXP (all->shift_mult, 1) = all->pow2[m];
 217
 218       set_shift_cost (speed, mode, m, set_src_cost (all->shift, speed));
 219       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, speed));
 220       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, speed));
 221       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, speed));
 222     }
 223
 224   if (SCALAR_INT_MODE_P (mode))
 225     {
 226       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 227            mode_from = (machine_mode)(mode_from + 1))
 228         init_expmed_one_conv (all, mode, mode_from, speed);
 229     }
 230   if (GET_MODE_CLASS (mode) == MODE_INT)
 231     {
 232       machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
 233       if (wider_mode != VOIDmode)
 234         {
 235           PUT_MODE (all->zext, wider_mode);
 236           PUT_MODE (all->wide_mult, wider_mode);
 237           PUT_MODE (all->wide_lshr, wider_mode);
 238           XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 239
 240           set_mul_widen_cost (speed, wider_mode,
 241                               set_src_cost (all->wide_mult, speed));
 242           set_mul_highpart_cost (speed, mode,
 243                                  set_src_cost (all->wide_trunc, speed));
 244         }
 245     }
 246 }
 247
 248 void
 249 init_expmed (void)
 250 {
 251   struct init_expmed_rtl all;
 252   machine_mode mode = QImode;
 253   int m, speed;
 254
 255   memset (&all, 0, sizeof all);
 256   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 257     {
 258       all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 259       all.cint[m] = GEN_INT (m);
 260     }
 261
 262   /* Avoid using hard regs in ways which may be unsupported.  */
 263   all.reg = gen_rtx_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 264   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 265   all.neg = gen_rtx_NEG (mode, all.reg);
 266   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 267   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 268   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 269   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 270   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 271   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 272   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 273   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 274   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 275   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 276   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 277   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 278   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 279   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 280   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 281
 282   for (speed = 0; speed < 2; speed++)
 283     {
 284       crtl->maybe_hot_insn_p = speed;
 285       set_zero_cost (speed, set_src_cost (const0_rtx, speed));
 286
 287       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 288            mode = (machine_mode)(mode + 1))
 289         init_expmed_one_mode (&all, mode, speed);
 290
 291       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 292         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 293              mode = (machine_mode)(mode + 1))
 294           init_expmed_one_mode (&all, mode, speed);
 295
 296       if (MIN_MODE_VECTOR_INT != VOIDmode)
 297         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 298              mode = (machine_mode)(mode + 1))
 299           init_expmed_one_mode (&all, mode, speed);
 300     }
 301
 302   if (alg_hash_used_p ())
 303     {
 304       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 305       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 306     }
 307   else
 308     set_alg_hash_used_p (true);
 309   default_rtl_profile ();
 310
 311   ggc_free (all.trunc);
 312   ggc_free (all.shift_sub1);
 313   ggc_free (all.shift_sub0);
 314   ggc_free (all.shift_add);
 315   ggc_free (all.shift_mult);
 316   ggc_free (all.shift);
 317   ggc_free (all.wide_trunc);
 318   ggc_free (all.wide_lshr);
 319   ggc_free (all.wide_mult);
 320   ggc_free (all.zext);
 321   ggc_free (all.smod_32);
 322   ggc_free (all.sdiv_32);
 323   ggc_free (all.udiv);
 324   ggc_free (all.sdiv);
 325   ggc_free (all.mult);
 326   ggc_free (all.neg);
 327   ggc_free (all.plus);
 328   ggc_free (all.reg);
 329 }
 330
 331 /* Return an rtx representing minus the value of X.
 332    MODE is the intended mode of the result,
 333    useful if X is a CONST_INT.  */
 334
 335 rtx
 336 negate_rtx (machine_mode mode, rtx x)
 337 {
 338   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 339
 340   if (result == 0)
 341     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 342
 343   return result;
 344 }
 345
 346 /* Adjust bitfield memory MEM so that it points to the first unit of mode
 347    MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
 348    If MODE is BLKmode, return a reference to every byte in the bitfield.
 349    Set *NEW_BITNUM to the bit position of the field within the new memory.  */
 350
 351 static rtx
 352 narrow_bit_field_mem (rtx mem, machine_mode mode,
 353                       unsigned HOST_WIDE_INT bitsize,
 354                       unsigned HOST_WIDE_INT bitnum,
 355                       unsigned HOST_WIDE_INT *new_bitnum)
 356 {
 357   if (mode == BLKmode)
 358     {
 359       *new_bitnum = bitnum % BITS_PER_UNIT;
 360       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 361       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 362                             / BITS_PER_UNIT);
 363       return adjust_bitfield_address_size (mem, mode, offset, size);
 364     }
 365   else
 366     {
 367       unsigned int unit = GET_MODE_BITSIZE (mode);
 368       *new_bitnum = bitnum % unit;
 369       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 370       return adjust_bitfield_address (mem, mode, offset);
 371     }
 372 }
 373
 374 /* The caller wants to perform insertion or extraction PATTERN on a
 375    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 376    BITREGION_START and BITREGION_END are as for store_bit_field
 377    and FIELDMODE is the natural mode of the field.
 378
 379    Search for a mode that is compatible with the memory access
 380    restrictions and (where applicable) with a register insertion or
 381    extraction.  Return the new memory on success, storing the adjusted
 382    bit position in *NEW_BITNUM.  Return null otherwise.  */
 383
 384 static rtx
 385 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 386                               rtx op0, HOST_WIDE_INT bitsize,
 387                               HOST_WIDE_INT bitnum,
 388                               unsigned HOST_WIDE_INT bitregion_start,
 389                               unsigned HOST_WIDE_INT bitregion_end,
 390                               machine_mode fieldmode,
 391                               unsigned HOST_WIDE_INT *new_bitnum)
 392 {
 393   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 394                                 bitregion_end, MEM_ALIGN (op0),
 395                                 MEM_VOLATILE_P (op0));
 396   machine_mode best_mode;
 397   if (iter.next_mode (&best_mode))
 398     {
 399       /* We can use a memory in BEST_MODE.  See whether this is true for
 400          any wider modes.  All other things being equal, we prefer to
 401          use the widest mode possible because it tends to expose more
 402          CSE opportunities.  */
 403       if (!iter.prefer_smaller_modes ())
 404         {
 405           /* Limit the search to the mode required by the corresponding
 406              register insertion or extraction instruction, if any.  */
 407           machine_mode limit_mode = word_mode;
 408           extraction_insn insn;
 409           if (get_best_reg_extraction_insn (&insn, pattern,
 410                                             GET_MODE_BITSIZE (best_mode),
 411                                             fieldmode))
 412             limit_mode = insn.field_mode;
 413
 414           machine_mode wider_mode;
 415           while (iter.next_mode (&wider_mode)
 416                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 417             best_mode = wider_mode;
 418         }
 419       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 420                                    new_bitnum);
 421     }
 422   return NULL_RTX;
 423 }
 424
 425 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 426    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 427    offset is then BITNUM / BITS_PER_UNIT.  */
 428
 429 static bool
 430 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 431                      unsigned HOST_WIDE_INT bitsize,
 432                      machine_mode struct_mode)
 433 {
 434   if (BYTES_BIG_ENDIAN)
 435     return (bitnum % BITS_PER_UNIT == 0
 436             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 437                 || (bitnum + bitsize) % BITS_PER_WORD == 0));
 438   else
 439     return bitnum % BITS_PER_WORD == 0;
 440 }
 441
 442 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 443    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 444    Return false if the access would touch memory outside the range
 445    BITREGION_START to BITREGION_END for conformance to the C++ memory
 446    model.  */
 447
 448 static bool
 449 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 450                             unsigned HOST_WIDE_INT bitnum,
 451                             machine_mode fieldmode,
 452                             unsigned HOST_WIDE_INT bitregion_start,
 453                             unsigned HOST_WIDE_INT bitregion_end)
 454 {
 455   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 456
 457   /* -fstrict-volatile-bitfields must be enabled and we must have a
 458      volatile MEM.  */
 459   if (!MEM_P (op0)
 460       || !MEM_VOLATILE_P (op0)
 461       || flag_strict_volatile_bitfields <= 0)
 462     return false;
 463
 464   /* Non-integral modes likely only happen with packed structures.
 465      Punt.  */
 466   if (!SCALAR_INT_MODE_P (fieldmode))
 467     return false;
 468
 469   /* The bit size must not be larger than the field mode, and
 470      the field mode must not be larger than a word.  */
 471   if (bitsize > modesize || modesize > BITS_PER_WORD)
 472     return false;
 473
 474   /* Check for cases of unaligned fields that must be split.  */
 475   if (bitnum % BITS_PER_UNIT + bitsize > modesize
 476       || (STRICT_ALIGNMENT
 477           && bitnum % GET_MODE_ALIGNMENT (fieldmode) + bitsize > modesize))
 478     return false;
 479
 480   /* Check for cases where the C++ memory model applies.  */
 481   if (bitregion_end != 0
 482       && (bitnum - bitnum % modesize < bitregion_start
 483           || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
 484     return false;
 485
 486   return true;
 487 }
 488
 489 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 490    bit number BITNUM can be treated as a simple value of mode MODE.  */
 491
 492 static bool
 493 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 494                        unsigned HOST_WIDE_INT bitnum, machine_mode mode)
 495 {
 496   return (MEM_P (op0)
 497           && bitnum % BITS_PER_UNIT == 0
 498           && bitsize == GET_MODE_BITSIZE (mode)
 499           && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
 500               || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
 501                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 502 }
 503 \f
 504 /* Try to use instruction INSV to store VALUE into a field of OP0.
 505    BITSIZE and BITNUM are as for store_bit_field.  */
 506
 507 static bool
 508 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 509                             unsigned HOST_WIDE_INT bitsize,
 510                             unsigned HOST_WIDE_INT bitnum,
 511                             rtx value)
 512 {
 513   struct expand_operand ops[4];
 514   rtx value1;
 515   rtx xop0 = op0;
 516   rtx_insn *last = get_last_insn ();
 517   bool copy_back = false;
 518
 519   machine_mode op_mode = insv->field_mode;
 520   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 521   if (bitsize == 0 || bitsize > unit)
 522     return false;
 523
 524   if (MEM_P (xop0))
 525     /* Get a reference to the first byte of the field.  */
 526     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 527                                  &bitnum);
 528   else
 529     {
 530       /* Convert from counting within OP0 to counting in OP_MODE.  */
 531       if (BYTES_BIG_ENDIAN)
 532         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 533
 534       /* If xop0 is a register, we need it in OP_MODE
 535          to make it acceptable to the format of insv.  */
 536       if (GET_CODE (xop0) == SUBREG)
 537         /* We can't just change the mode, because this might clobber op0,
 538            and we will need the original value of op0 if insv fails.  */
 539         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 540       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 541         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 542     }
 543
 544   /* If the destination is a paradoxical subreg such that we need a
 545      truncate to the inner mode, perform the insertion on a temporary and
 546      truncate the result to the original destination.  Note that we can't
 547      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 548      X) 0)) is (reg:N X).  */
 549   if (GET_CODE (xop0) == SUBREG
 550       && REG_P (SUBREG_REG (xop0))
 551       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 552                                          op_mode))
 553     {
 554       rtx tem = gen_reg_rtx (op_mode);
 555       emit_move_insn (tem, xop0);
 556       xop0 = tem;
 557       copy_back = true;
 558     }
 559
 560   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 561      "backwards" from the size of the unit we are inserting into.
 562      Otherwise, we count bits from the most significant on a
 563      BYTES/BITS_BIG_ENDIAN machine.  */
 564
 565   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 566     bitnum = unit - bitsize - bitnum;
 567
 568   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 569   value1 = value;
 570   if (GET_MODE (value) != op_mode)
 571     {
 572       if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 573         {
 574           /* Optimization: Don't bother really extending VALUE
 575              if it has all the bits we will actually use.  However,
 576              if we must narrow it, be sure we do it correctly.  */
 577
 578           if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 579             {
 580               rtx tmp;
 581
 582               tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 583               if (! tmp)
 584                 tmp = simplify_gen_subreg (op_mode,
 585                                            force_reg (GET_MODE (value),
 586                                                       value1),
 587                                            GET_MODE (value), 0);
 588               value1 = tmp;
 589             }
 590           else
 591             value1 = gen_lowpart (op_mode, value1);
 592         }
 593       else if (CONST_INT_P (value))
 594         value1 = gen_int_mode (INTVAL (value), op_mode);
 595       else
 596         /* Parse phase is supposed to make VALUE's data type
 597            match that of the component reference, which is a type
 598            at least as wide as the field; so VALUE should have
 599            a mode that corresponds to that type.  */
 600         gcc_assert (CONSTANT_P (value));
 601     }
 602
 603   create_fixed_operand (&ops[0], xop0);
 604   create_integer_operand (&ops[1], bitsize);
 605   create_integer_operand (&ops[2], bitnum);
 606   create_input_operand (&ops[3], value1, op_mode);
 607   if (maybe_expand_insn (insv->icode, 4, ops))
 608     {
 609       if (copy_back)
 610         convert_move (op0, xop0, true);
 611       return true;
 612     }
 613   delete_insns_since (last);
 614   return false;
 615 }
 616
 617 /* A subroutine of store_bit_field, with the same arguments.  Return true
 618    if the operation could be implemented.
 619
 620    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 621    no other way of implementing the operation.  If FALLBACK_P is false,
 622    return false instead.  */
 623
 624 static bool
 625 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 626                    unsigned HOST_WIDE_INT bitnum,
 627                    unsigned HOST_WIDE_INT bitregion_start,
 628                    unsigned HOST_WIDE_INT bitregion_end,
 629                    machine_mode fieldmode,
 630                    rtx value, bool fallback_p)
 631 {
 632   rtx op0 = str_rtx;
 633   rtx orig_value;
 634
 635   while (GET_CODE (op0) == SUBREG)
 636     {
 637       /* The following line once was done only if WORDS_BIG_ENDIAN,
 638          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 639          meaningful at a much higher level; when structures are copied
 640          between memory and regs, the higher-numbered regs
 641          always get higher addresses.  */
 642       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 643       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 644       int byte_offset = 0;
 645
 646       /* Paradoxical subregs need special handling on big endian machines.  */
 647       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 648         {
 649           int difference = inner_mode_size - outer_mode_size;
 650
 651           if (WORDS_BIG_ENDIAN)
 652             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 653           if (BYTES_BIG_ENDIAN)
 654             byte_offset += difference % UNITS_PER_WORD;
 655         }
 656       else
 657         byte_offset = SUBREG_BYTE (op0);
 658
 659       bitnum += byte_offset * BITS_PER_UNIT;
 660       op0 = SUBREG_REG (op0);
 661     }
 662
 663   /* No action is needed if the target is a register and if the field
 664      lies completely outside that register.  This can occur if the source
 665      code contains an out-of-bounds access to a small array.  */
 666   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 667     return true;
 668
 669   /* Use vec_set patterns for inserting parts of vectors whenever
 670      available.  */
 671   if (VECTOR_MODE_P (GET_MODE (op0))
 672       && !MEM_P (op0)
 673       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 674       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 675       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 676       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 677     {
 678       struct expand_operand ops[3];
 679       machine_mode outermode = GET_MODE (op0);
 680       machine_mode innermode = GET_MODE_INNER (outermode);
 681       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 682       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 683
 684       create_fixed_operand (&ops[0], op0);
 685       create_input_operand (&ops[1], value, innermode);
 686       create_integer_operand (&ops[2], pos);
 687       if (maybe_expand_insn (icode, 3, ops))
 688         return true;
 689     }
 690
 691   /* If the target is a register, overwriting the entire object, or storing
 692      a full-word or multi-word field can be done with just a SUBREG.  */
 693   if (!MEM_P (op0)
 694       && bitsize == GET_MODE_BITSIZE (fieldmode)
 695       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 696           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 697     {
 698       /* Use the subreg machinery either to narrow OP0 to the required
 699          words or to cope with mode punning between equal-sized modes.
 700          In the latter case, use subreg on the rhs side, not lhs.  */
 701       rtx sub;
 702
 703       if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
 704         {
 705           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 706           if (sub)
 707             {
 708               emit_move_insn (op0, sub);
 709               return true;
 710             }
 711         }
 712       else
 713         {
 714           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 715                                      bitnum / BITS_PER_UNIT);
 716           if (sub)
 717             {
 718               emit_move_insn (sub, value);
 719               return true;
 720             }
 721         }
 722     }
 723
 724   /* If the target is memory, storing any naturally aligned field can be
 725      done with a simple store.  For targets that support fast unaligned
 726      memory, any naturally sized, unit aligned field can be done directly.  */
 727   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
 728     {
 729       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 730       emit_move_insn (op0, value);
 731       return true;
 732     }
 733
 734   /* Make sure we are playing with integral modes.  Pun with subregs
 735      if we aren't.  This must come after the entire register case above,
 736      since that case is valid for any mode.  The following cases are only
 737      valid for integral modes.  */
 738   {
 739     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 740     if (imode != GET_MODE (op0))
 741       {
 742         if (MEM_P (op0))
 743           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
 744         else
 745           {
 746             gcc_assert (imode != BLKmode);
 747             op0 = gen_lowpart (imode, op0);
 748           }
 749       }
 750   }
 751
 752   /* Storing an lsb-aligned field in a register
 753      can be done with a movstrict instruction.  */
 754
 755   if (!MEM_P (op0)
 756       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 757       && bitsize == GET_MODE_BITSIZE (fieldmode)
 758       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 759     {
 760       struct expand_operand ops[2];
 761       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 762       rtx arg0 = op0;
 763       unsigned HOST_WIDE_INT subreg_off;
 764
 765       if (GET_CODE (arg0) == SUBREG)
 766         {
 767           /* Else we've got some float mode source being extracted into
 768              a different float mode destination -- this combination of
 769              subregs results in Severe Tire Damage.  */
 770           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 771                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 772                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 773           arg0 = SUBREG_REG (arg0);
 774         }
 775
 776       subreg_off = bitnum / BITS_PER_UNIT;
 777       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 778         {
 779           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 780
 781           create_fixed_operand (&ops[0], arg0);
 782           /* Shrink the source operand to FIELDMODE.  */
 783           create_convert_operand_to (&ops[1], value, fieldmode, false);
 784           if (maybe_expand_insn (icode, 2, ops))
 785             return true;
 786         }
 787     }
 788
 789   /* Handle fields bigger than a word.  */
 790
 791   if (bitsize > BITS_PER_WORD)
 792     {
 793       /* Here we transfer the words of the field
 794          in the order least significant first.
 795          This is because the most significant word is the one which may
 796          be less than full.
 797          However, only do that if the value is not BLKmode.  */
 798
 799       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 800       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 801       unsigned int i;
 802       rtx_insn *last;
 803
 804       /* This is the mode we must force value to, so that there will be enough
 805          subwords to extract.  Note that fieldmode will often (always?) be
 806          VOIDmode, because that is what store_field uses to indicate that this
 807          is a bit field, but passing VOIDmode to operand_subword_force
 808          is not allowed.  */
 809       fieldmode = GET_MODE (value);
 810       if (fieldmode == VOIDmode)
 811         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 812
 813       last = get_last_insn ();
 814       for (i = 0; i < nwords; i++)
 815         {
 816           /* If I is 0, use the low-order word in both field and target;
 817              if I is 1, use the next to lowest word; and so on.  */
 818           unsigned int wordnum = (backwards
 819                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 820                                   - i - 1
 821                                   : i);
 822           unsigned int bit_offset = (backwards
 823                                      ? MAX ((int) bitsize - ((int) i + 1)
 824                                             * BITS_PER_WORD,
 825                                             0)
 826                                      : (int) i * BITS_PER_WORD);
 827           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 828           unsigned HOST_WIDE_INT new_bitsize =
 829             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 830
 831           /* If the remaining chunk doesn't have full wordsize we have
 832              to make sure that for big endian machines the higher order
 833              bits are used.  */
 834           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 835             value_word = simplify_expand_binop (word_mode, lshr_optab,
 836                                                 value_word,
 837                                                 GEN_INT (BITS_PER_WORD
 838                                                          - new_bitsize),
 839                                                 NULL_RTX, true,
 840                                                 OPTAB_LIB_WIDEN);
 841
 842           if (!store_bit_field_1 (op0, new_bitsize,
 843                                   bitnum + bit_offset,
 844                                   bitregion_start, bitregion_end,
 845                                   word_mode,
 846                                   value_word, fallback_p))
 847             {
 848               delete_insns_since (last);
 849               return false;
 850             }
 851         }
 852       return true;
 853     }
 854
 855   /* If VALUE has a floating-point or complex mode, access it as an
 856      integer of the corresponding size.  This can occur on a machine
 857      with 64 bit registers that uses SFmode for float.  It can also
 858      occur for unaligned float or complex fields.  */
 859   orig_value = value;
 860   if (GET_MODE (value) != VOIDmode
 861       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 862       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 863     {
 864       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 865       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 866     }
 867
 868   /* If OP0 is a multi-word register, narrow it to the affected word.
 869      If the region spans two words, defer to store_split_bit_field.  */
 870   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 871     {
 872       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
 873                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 874       gcc_assert (op0);
 875       bitnum %= BITS_PER_WORD;
 876       if (bitnum + bitsize > BITS_PER_WORD)
 877         {
 878           if (!fallback_p)
 879             return false;
 880
 881           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 882                                  bitregion_end, value);
 883           return true;
 884         }
 885     }
 886
 887   /* From here on we can assume that the field to be stored in fits
 888      within a word.  If the destination is a register, it too fits
 889      in a word.  */
 890
 891   extraction_insn insv;
 892   if (!MEM_P (op0)
 893       && get_best_reg_extraction_insn (&insv, EP_insv,
 894                                        GET_MODE_BITSIZE (GET_MODE (op0)),
 895                                        fieldmode)
 896       && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 897     return true;
 898
 899   /* If OP0 is a memory, try copying it to a register and seeing if a
 900      cheap register alternative is available.  */
 901   if (MEM_P (op0))
 902     {
 903       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
 904                                         fieldmode)
 905           && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 906         return true;
 907
 908       rtx_insn *last = get_last_insn ();
 909
 910       /* Try loading part of OP0 into a register, inserting the bitfield
 911          into that, and then copying the result back to OP0.  */
 912       unsigned HOST_WIDE_INT bitpos;
 913       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
 914                                                bitregion_start, bitregion_end,
 915                                                fieldmode, &bitpos);
 916       if (xop0)
 917         {
 918           rtx tempreg = copy_to_reg (xop0);
 919           if (store_bit_field_1 (tempreg, bitsize, bitpos,
 920                                  bitregion_start, bitregion_end,
 921                                  fieldmode, orig_value, false))
 922             {
 923               emit_move_insn (xop0, tempreg);
 924               return true;
 925             }
 926           delete_insns_since (last);
 927         }
 928     }
 929
 930   if (!fallback_p)
 931     return false;
 932
 933   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
 934                          bitregion_end, value);
 935   return true;
 936 }
 937
 938 /* Generate code to store value from rtx VALUE
 939    into a bit-field within structure STR_RTX
 940    containing BITSIZE bits starting at bit BITNUM.
 941
 942    BITREGION_START is bitpos of the first bitfield in this region.
 943    BITREGION_END is the bitpos of the ending bitfield in this region.
 944    These two fields are 0, if the C++ memory model does not apply,
 945    or we are not interested in keeping track of bitfield regions.
 946
 947    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 948
 949 void
 950 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 951                  unsigned HOST_WIDE_INT bitnum,
 952                  unsigned HOST_WIDE_INT bitregion_start,
 953                  unsigned HOST_WIDE_INT bitregion_end,
 954                  machine_mode fieldmode,
 955                  rtx value)
 956 {
 957   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
 958   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
 959                                   bitregion_start, bitregion_end))
 960     {
 961       /* Storing any naturally aligned field can be done with a simple
 962          store.  For targets that support fast unaligned memory, any
 963          naturally sized, unit aligned field can be done directly.  */
 964       if (simple_mem_bitfield_p (str_rtx, bitsize, bitnum, fieldmode))
 965         {
 966           str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
 967                                              bitnum / BITS_PER_UNIT);
 968           emit_move_insn (str_rtx, value);
 969         }
 970       else
 971         {
 972           str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
 973                                           &bitnum);
 974           /* Explicitly override the C/C++ memory model; ignore the
 975              bit range so that we can do the access in the mode mandated
 976              by -fstrict-volatile-bitfields instead.  */
 977           store_fixed_bit_field_1 (str_rtx, bitsize, bitnum, value);
 978         }
 979
 980       return;
 981     }
 982
 983   /* Under the C++0x memory model, we must not touch bits outside the
 984      bit region.  Adjust the address to start at the beginning of the
 985      bit region.  */
 986   if (MEM_P (str_rtx) && bitregion_start > 0)
 987     {
 988       machine_mode bestmode;
 989       HOST_WIDE_INT offset, size;
 990
 991       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
 992
 993       offset = bitregion_start / BITS_PER_UNIT;
 994       bitnum -= bitregion_start;
 995       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
 996       bitregion_end -= bitregion_start;
 997       bitregion_start = 0;
 998       bestmode = get_best_mode (bitsize, bitnum,
 999                                 bitregion_start, bitregion_end,
1000                                 MEM_ALIGN (str_rtx), VOIDmode,
1001                                 MEM_VOLATILE_P (str_rtx));
1002       str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
1003     }
1004
1005   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1006                           bitregion_start, bitregion_end,
1007                           fieldmode, value, true))
1008     gcc_unreachable ();
1009 }
1010 \f
1011 /* Use shifts and boolean operations to store VALUE into a bit field of
1012    width BITSIZE in OP0, starting at bit BITNUM.  */
1013
1014 static void
1015 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1016                        unsigned HOST_WIDE_INT bitnum,
1017                        unsigned HOST_WIDE_INT bitregion_start,
1018                        unsigned HOST_WIDE_INT bitregion_end,
1019                        rtx value)
1020 {
1021   /* There is a case not handled here:
1022      a structure with a known alignment of just a halfword
1023      and a field split across two aligned halfwords within the structure.
1024      Or likewise a structure with a known alignment of just a byte
1025      and a field split across two bytes.
1026      Such cases are not supposed to be able to occur.  */
1027
1028   if (MEM_P (op0))
1029     {
1030       machine_mode mode = GET_MODE (op0);
1031       if (GET_MODE_BITSIZE (mode) == 0
1032           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1033         mode = word_mode;
1034       mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1035                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1036
1037       if (mode == VOIDmode)
1038         {
1039           /* The only way this should occur is if the field spans word
1040              boundaries.  */
1041           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1042                                  bitregion_end, value);
1043           return;
1044         }
1045
1046       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1047     }
1048
1049   store_fixed_bit_field_1 (op0, bitsize, bitnum, value);
1050 }
1051
1052 /* Helper function for store_fixed_bit_field, stores
1053    the bit field always using the MODE of OP0.  */
1054
1055 static void
1056 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1057                          unsigned HOST_WIDE_INT bitnum,
1058                          rtx value)
1059 {
1060   machine_mode mode;
1061   rtx temp;
1062   int all_zero = 0;
1063   int all_one = 0;
1064
1065   mode = GET_MODE (op0);
1066   gcc_assert (SCALAR_INT_MODE_P (mode));
1067
1068   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1069      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1070
1071   if (BYTES_BIG_ENDIAN)
1072     /* BITNUM is the distance between our msb
1073        and that of the containing datum.
1074        Convert it to the distance from the lsb.  */
1075     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1076
1077   /* Now BITNUM is always the distance between our lsb
1078      and that of OP0.  */
1079
1080   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1081      we must first convert its mode to MODE.  */
1082
1083   if (CONST_INT_P (value))
1084     {
1085       unsigned HOST_WIDE_INT v = UINTVAL (value);
1086
1087       if (bitsize < HOST_BITS_PER_WIDE_INT)
1088         v &= ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1;
1089
1090       if (v == 0)
1091         all_zero = 1;
1092       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1093                 && v == ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1)
1094                || (bitsize == HOST_BITS_PER_WIDE_INT
1095                    && v == (unsigned HOST_WIDE_INT) -1))
1096         all_one = 1;
1097
1098       value = lshift_value (mode, v, bitnum);
1099     }
1100   else
1101     {
1102       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1103                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1104
1105       if (GET_MODE (value) != mode)
1106         value = convert_to_mode (mode, value, 1);
1107
1108       if (must_and)
1109         value = expand_binop (mode, and_optab, value,
1110                               mask_rtx (mode, 0, bitsize, 0),
1111                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1112       if (bitnum > 0)
1113         value = expand_shift (LSHIFT_EXPR, mode, value,
1114                               bitnum, NULL_RTX, 1);
1115     }
1116
1117   /* Now clear the chosen bits in OP0,
1118      except that if VALUE is -1 we need not bother.  */
1119   /* We keep the intermediates in registers to allow CSE to combine
1120      consecutive bitfield assignments.  */
1121
1122   temp = force_reg (mode, op0);
1123
1124   if (! all_one)
1125     {
1126       temp = expand_binop (mode, and_optab, temp,
1127                            mask_rtx (mode, bitnum, bitsize, 1),
1128                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1129       temp = force_reg (mode, temp);
1130     }
1131
1132   /* Now logical-or VALUE into OP0, unless it is zero.  */
1133
1134   if (! all_zero)
1135     {
1136       temp = expand_binop (mode, ior_optab, temp, value,
1137                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1138       temp = force_reg (mode, temp);
1139     }
1140
1141   if (op0 != temp)
1142     {
1143       op0 = copy_rtx (op0);
1144       emit_move_insn (op0, temp);
1145     }
1146 }
1147 \f
1148 /* Store a bit field that is split across multiple accessible memory objects.
1149
1150    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1151    BITSIZE is the field width; BITPOS the position of its first bit
1152    (within the word).
1153    VALUE is the value to store.
1154
1155    This does not yet handle fields wider than BITS_PER_WORD.  */
1156
1157 static void
1158 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1159                        unsigned HOST_WIDE_INT bitpos,
1160                        unsigned HOST_WIDE_INT bitregion_start,
1161                        unsigned HOST_WIDE_INT bitregion_end,
1162                        rtx value)
1163 {
1164   unsigned int unit;
1165   unsigned int bitsdone = 0;
1166
1167   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1168      much at a time.  */
1169   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1170     unit = BITS_PER_WORD;
1171   else
1172     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1173
1174   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1175      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1176      again, and we will mutually recurse forever.  */
1177   if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1178     unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1179
1180   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1181      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1182      that VALUE might be a floating-point constant.  */
1183   if (CONSTANT_P (value) && !CONST_INT_P (value))
1184     {
1185       rtx word = gen_lowpart_common (word_mode, value);
1186
1187       if (word && (value != word))
1188         value = word;
1189       else
1190         value = gen_lowpart_common (word_mode,
1191                                     force_reg (GET_MODE (value) != VOIDmode
1192                                                ? GET_MODE (value)
1193                                                : word_mode, value));
1194     }
1195
1196   while (bitsdone < bitsize)
1197     {
1198       unsigned HOST_WIDE_INT thissize;
1199       rtx part, word;
1200       unsigned HOST_WIDE_INT thispos;
1201       unsigned HOST_WIDE_INT offset;
1202
1203       offset = (bitpos + bitsdone) / unit;
1204       thispos = (bitpos + bitsdone) % unit;
1205
1206       /* When region of bytes we can touch is restricted, decrease
1207          UNIT close to the end of the region as needed.  If op0 is a REG
1208          or SUBREG of REG, don't do this, as there can't be data races
1209          on a register and we can expand shorter code in some cases.  */
1210       if (bitregion_end
1211           && unit > BITS_PER_UNIT
1212           && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1213           && !REG_P (op0)
1214           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1215         {
1216           unit = unit / 2;
1217           continue;
1218         }
1219
1220       /* THISSIZE must not overrun a word boundary.  Otherwise,
1221          store_fixed_bit_field will call us again, and we will mutually
1222          recurse forever.  */
1223       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1224       thissize = MIN (thissize, unit - thispos);
1225
1226       if (BYTES_BIG_ENDIAN)
1227         {
1228           /* Fetch successively less significant portions.  */
1229           if (CONST_INT_P (value))
1230             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1231                              >> (bitsize - bitsdone - thissize))
1232                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1233           else
1234             {
1235               int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1236               /* The args are chosen so that the last part includes the
1237                  lsb.  Give extract_bit_field the value it needs (with
1238                  endianness compensation) to fetch the piece we want.  */
1239               part = extract_fixed_bit_field (word_mode, value, thissize,
1240                                               total_bits - bitsize + bitsdone,
1241                                               NULL_RTX, 1);
1242             }
1243         }
1244       else
1245         {
1246           /* Fetch successively more significant portions.  */
1247           if (CONST_INT_P (value))
1248             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1249                              >> bitsdone)
1250                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1251           else
1252             part = extract_fixed_bit_field (word_mode, value, thissize,
1253                                             bitsdone, NULL_RTX, 1);
1254         }
1255
1256       /* If OP0 is a register, then handle OFFSET here.
1257
1258          When handling multiword bitfields, extract_bit_field may pass
1259          down a word_mode SUBREG of a larger REG for a bitfield that actually
1260          crosses a word boundary.  Thus, for a SUBREG, we must find
1261          the current word starting from the base register.  */
1262       if (GET_CODE (op0) == SUBREG)
1263         {
1264           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD)
1265                             + (offset * unit / BITS_PER_WORD);
1266           machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1267           if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1268             word = word_offset ? const0_rtx : op0;
1269           else
1270             word = operand_subword_force (SUBREG_REG (op0), word_offset,
1271                                           GET_MODE (SUBREG_REG (op0)));
1272           offset &= BITS_PER_WORD / unit - 1;
1273         }
1274       else if (REG_P (op0))
1275         {
1276           machine_mode op0_mode = GET_MODE (op0);
1277           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1278             word = offset ? const0_rtx : op0;
1279           else
1280             word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1281                                           GET_MODE (op0));
1282           offset &= BITS_PER_WORD / unit - 1;
1283         }
1284       else
1285         word = op0;
1286
1287       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1288          it is just an out-of-bounds access.  Ignore it.  */
1289       if (word != const0_rtx)
1290         store_fixed_bit_field (word, thissize, offset * unit + thispos,
1291                                bitregion_start, bitregion_end, part);
1292       bitsdone += thissize;
1293     }
1294 }
1295 \f
1296 /* A subroutine of extract_bit_field_1 that converts return value X
1297    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1298    to extract_bit_field.  */
1299
1300 static rtx
1301 convert_extracted_bit_field (rtx x, machine_mode mode,
1302                              machine_mode tmode, bool unsignedp)
1303 {
1304   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1305     return x;
1306
1307   /* If the x mode is not a scalar integral, first convert to the
1308      integer mode of that size and then access it as a floating-point
1309      value via a SUBREG.  */
1310   if (!SCALAR_INT_MODE_P (tmode))
1311     {
1312       machine_mode smode;
1313
1314       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1315       x = convert_to_mode (smode, x, unsignedp);
1316       x = force_reg (smode, x);
1317       return gen_lowpart (tmode, x);
1318     }
1319
1320   return convert_to_mode (tmode, x, unsignedp);
1321 }
1322
1323 /* Try to use an ext(z)v pattern to extract a field from OP0.
1324    Return the extracted value on success, otherwise return null.
1325    EXT_MODE is the mode of the extraction and the other arguments
1326    are as for extract_bit_field.  */
1327
1328 static rtx
1329 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1330                               unsigned HOST_WIDE_INT bitsize,
1331                               unsigned HOST_WIDE_INT bitnum,
1332                               int unsignedp, rtx target,
1333                               machine_mode mode, machine_mode tmode)
1334 {
1335   struct expand_operand ops[4];
1336   rtx spec_target = target;
1337   rtx spec_target_subreg = 0;
1338   machine_mode ext_mode = extv->field_mode;
1339   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1340
1341   if (bitsize == 0 || unit < bitsize)
1342     return NULL_RTX;
1343
1344   if (MEM_P (op0))
1345     /* Get a reference to the first byte of the field.  */
1346     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1347                                 &bitnum);
1348   else
1349     {
1350       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1351       if (BYTES_BIG_ENDIAN)
1352         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1353
1354       /* If op0 is a register, we need it in EXT_MODE to make it
1355          acceptable to the format of ext(z)v.  */
1356       if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1357         return NULL_RTX;
1358       if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1359         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1360     }
1361
1362   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1363      "backwards" from the size of the unit we are extracting from.
1364      Otherwise, we count bits from the most significant on a
1365      BYTES/BITS_BIG_ENDIAN machine.  */
1366
1367   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1368     bitnum = unit - bitsize - bitnum;
1369
1370   if (target == 0)
1371     target = spec_target = gen_reg_rtx (tmode);
1372
1373   if (GET_MODE (target) != ext_mode)
1374     {
1375       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1376          between the mode of the extraction (word_mode) and the target
1377          mode.  Instead, create a temporary and use convert_move to set
1378          the target.  */
1379       if (REG_P (target)
1380           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1381         {
1382           target = gen_lowpart (ext_mode, target);
1383           if (GET_MODE_PRECISION (ext_mode)
1384               > GET_MODE_PRECISION (GET_MODE (spec_target)))
1385             spec_target_subreg = target;
1386         }
1387       else
1388         target = gen_reg_rtx (ext_mode);
1389     }
1390
1391   create_output_operand (&ops[0], target, ext_mode);
1392   create_fixed_operand (&ops[1], op0);
1393   create_integer_operand (&ops[2], bitsize);
1394   create_integer_operand (&ops[3], bitnum);
1395   if (maybe_expand_insn (extv->icode, 4, ops))
1396     {
1397       target = ops[0].value;
1398       if (target == spec_target)
1399         return target;
1400       if (target == spec_target_subreg)
1401         return spec_target;
1402       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1403     }
1404   return NULL_RTX;
1405 }
1406
1407 /* A subroutine of extract_bit_field, with the same arguments.
1408    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1409    if we can find no other means of implementing the operation.
1410    if FALLBACK_P is false, return NULL instead.  */
1411
1412 static rtx
1413 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1414                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1415                      machine_mode mode, machine_mode tmode,
1416                      bool fallback_p)
1417 {
1418   rtx op0 = str_rtx;
1419   machine_mode int_mode;
1420   machine_mode mode1;
1421
1422   if (tmode == VOIDmode)
1423     tmode = mode;
1424
1425   while (GET_CODE (op0) == SUBREG)
1426     {
1427       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1428       op0 = SUBREG_REG (op0);
1429     }
1430
1431   /* If we have an out-of-bounds access to a register, just return an
1432      uninitialized register of the required mode.  This can occur if the
1433      source code contains an out-of-bounds access to a small array.  */
1434   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1435     return gen_reg_rtx (tmode);
1436
1437   if (REG_P (op0)
1438       && mode == GET_MODE (op0)
1439       && bitnum == 0
1440       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1441     {
1442       /* We're trying to extract a full register from itself.  */
1443       return op0;
1444     }
1445
1446   /* See if we can get a better vector mode before extracting.  */
1447   if (VECTOR_MODE_P (GET_MODE (op0))
1448       && !MEM_P (op0)
1449       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1450     {
1451       machine_mode new_mode;
1452
1453       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1454         new_mode = MIN_MODE_VECTOR_FLOAT;
1455       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1456         new_mode = MIN_MODE_VECTOR_FRACT;
1457       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1458         new_mode = MIN_MODE_VECTOR_UFRACT;
1459       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1460         new_mode = MIN_MODE_VECTOR_ACCUM;
1461       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1462         new_mode = MIN_MODE_VECTOR_UACCUM;
1463       else
1464         new_mode = MIN_MODE_VECTOR_INT;
1465
1466       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1467         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1468             && targetm.vector_mode_supported_p (new_mode))
1469           break;
1470       if (new_mode != VOIDmode)
1471         op0 = gen_lowpart (new_mode, op0);
1472     }
1473
1474   /* Use vec_extract patterns for extracting parts of vectors whenever
1475      available.  */
1476   if (VECTOR_MODE_P (GET_MODE (op0))
1477       && !MEM_P (op0)
1478       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1479       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1480           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1481     {
1482       struct expand_operand ops[3];
1483       machine_mode outermode = GET_MODE (op0);
1484       machine_mode innermode = GET_MODE_INNER (outermode);
1485       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1486       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1487
1488       create_output_operand (&ops[0], target, innermode);
1489       create_input_operand (&ops[1], op0, outermode);
1490       create_integer_operand (&ops[2], pos);
1491       if (maybe_expand_insn (icode, 3, ops))
1492         {
1493           target = ops[0].value;
1494           if (GET_MODE (target) != mode)
1495             return gen_lowpart (tmode, target);
1496           return target;
1497         }
1498     }
1499
1500   /* Make sure we are playing with integral modes.  Pun with subregs
1501      if we aren't.  */
1502   {
1503     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1504     if (imode != GET_MODE (op0))
1505       {
1506         if (MEM_P (op0))
1507           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1508         else if (imode != BLKmode)
1509           {
1510             op0 = gen_lowpart (imode, op0);
1511
1512             /* If we got a SUBREG, force it into a register since we
1513                aren't going to be able to do another SUBREG on it.  */
1514             if (GET_CODE (op0) == SUBREG)
1515               op0 = force_reg (imode, op0);
1516           }
1517         else if (REG_P (op0))
1518           {
1519             rtx reg, subreg;
1520             imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1521                                             MODE_INT);
1522             reg = gen_reg_rtx (imode);
1523             subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1524             emit_move_insn (subreg, op0);
1525             op0 = reg;
1526             bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1527           }
1528         else
1529           {
1530             HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1531             rtx mem = assign_stack_temp (GET_MODE (op0), size);
1532             emit_move_insn (mem, op0);
1533             op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1534           }
1535       }
1536   }
1537
1538   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1539      If that's wrong, the solution is to test for it and set TARGET to 0
1540      if needed.  */
1541
1542   /* Get the mode of the field to use for atomic access or subreg
1543      conversion.  */
1544   mode1 = mode;
1545   if (SCALAR_INT_MODE_P (tmode))
1546     {
1547       machine_mode try_mode = mode_for_size (bitsize,
1548                                                   GET_MODE_CLASS (tmode), 0);
1549       if (try_mode != BLKmode)
1550         mode1 = try_mode;
1551     }
1552   gcc_assert (mode1 != BLKmode);
1553
1554   /* Extraction of a full MODE1 value can be done with a subreg as long
1555      as the least significant bit of the value is the least significant
1556      bit of either OP0 or a word of OP0.  */
1557   if (!MEM_P (op0)
1558       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1559       && bitsize == GET_MODE_BITSIZE (mode1)
1560       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1561     {
1562       rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1563                                      bitnum / BITS_PER_UNIT);
1564       if (sub)
1565         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1566     }
1567
1568   /* Extraction of a full MODE1 value can be done with a load as long as
1569      the field is on a byte boundary and is sufficiently aligned.  */
1570   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1571     {
1572       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1573       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1574     }
1575
1576   /* Handle fields bigger than a word.  */
1577
1578   if (bitsize > BITS_PER_WORD)
1579     {
1580       /* Here we transfer the words of the field
1581          in the order least significant first.
1582          This is because the most significant word is the one which may
1583          be less than full.  */
1584
1585       unsigned int backwards = WORDS_BIG_ENDIAN;
1586       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1587       unsigned int i;
1588       rtx_insn *last;
1589
1590       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1591         target = gen_reg_rtx (mode);
1592
1593       /* Indicate for flow that the entire target reg is being set.  */
1594       emit_clobber (target);
1595
1596       last = get_last_insn ();
1597       for (i = 0; i < nwords; i++)
1598         {
1599           /* If I is 0, use the low-order word in both field and target;
1600              if I is 1, use the next to lowest word; and so on.  */
1601           /* Word number in TARGET to use.  */
1602           unsigned int wordnum
1603             = (backwards
1604                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1605                : i);
1606           /* Offset from start of field in OP0.  */
1607           unsigned int bit_offset = (backwards
1608                                      ? MAX ((int) bitsize - ((int) i + 1)
1609                                             * BITS_PER_WORD,
1610                                             0)
1611                                      : (int) i * BITS_PER_WORD);
1612           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1613           rtx result_part
1614             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1615                                              bitsize - i * BITS_PER_WORD),
1616                                    bitnum + bit_offset, 1, target_part,
1617                                    mode, word_mode, fallback_p);
1618
1619           gcc_assert (target_part);
1620           if (!result_part)
1621             {
1622               delete_insns_since (last);
1623               return NULL;
1624             }
1625
1626           if (result_part != target_part)
1627             emit_move_insn (target_part, result_part);
1628         }
1629
1630       if (unsignedp)
1631         {
1632           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1633              need to be zero'd out.  */
1634           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1635             {
1636               unsigned int i, total_words;
1637
1638               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1639               for (i = nwords; i < total_words; i++)
1640                 emit_move_insn
1641                   (operand_subword (target,
1642                                     backwards ? total_words - i - 1 : i,
1643                                     1, VOIDmode),
1644                    const0_rtx);
1645             }
1646           return target;
1647         }
1648
1649       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1650       target = expand_shift (LSHIFT_EXPR, mode, target,
1651                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1652       return expand_shift (RSHIFT_EXPR, mode, target,
1653                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1654     }
1655
1656   /* If OP0 is a multi-word register, narrow it to the affected word.
1657      If the region spans two words, defer to extract_split_bit_field.  */
1658   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1659     {
1660       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1661                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1662       bitnum %= BITS_PER_WORD;
1663       if (bitnum + bitsize > BITS_PER_WORD)
1664         {
1665           if (!fallback_p)
1666             return NULL_RTX;
1667           target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1668           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1669         }
1670     }
1671
1672   /* From here on we know the desired field is smaller than a word.
1673      If OP0 is a register, it too fits within a word.  */
1674   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1675   extraction_insn extv;
1676   if (!MEM_P (op0)
1677       /* ??? We could limit the structure size to the part of OP0 that
1678          contains the field, with appropriate checks for endianness
1679          and TRULY_NOOP_TRUNCATION.  */
1680       && get_best_reg_extraction_insn (&extv, pattern,
1681                                        GET_MODE_BITSIZE (GET_MODE (op0)),
1682                                        tmode))
1683     {
1684       rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1685                                                  unsignedp, target, mode,
1686                                                  tmode);
1687       if (result)
1688         return result;
1689     }
1690
1691   /* If OP0 is a memory, try copying it to a register and seeing if a
1692      cheap register alternative is available.  */
1693   if (MEM_P (op0))
1694     {
1695       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1696                                         tmode))
1697         {
1698           rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1699                                                      bitnum, unsignedp,
1700                                                      target, mode,
1701                                                      tmode);
1702           if (result)
1703             return result;
1704         }
1705
1706       rtx_insn *last = get_last_insn ();
1707
1708       /* Try loading part of OP0 into a register and extracting the
1709          bitfield from that.  */
1710       unsigned HOST_WIDE_INT bitpos;
1711       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1712                                                0, 0, tmode, &bitpos);
1713       if (xop0)
1714         {
1715           xop0 = copy_to_reg (xop0);
1716           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1717                                             unsignedp, target,
1718                                             mode, tmode, false);
1719           if (result)
1720             return result;
1721           delete_insns_since (last);
1722         }
1723     }
1724
1725   if (!fallback_p)
1726     return NULL;
1727
1728   /* Find a correspondingly-sized integer field, so we can apply
1729      shifts and masks to it.  */
1730   int_mode = int_mode_for_mode (tmode);
1731   if (int_mode == BLKmode)
1732     int_mode = int_mode_for_mode (mode);
1733   /* Should probably push op0 out to memory and then do a load.  */
1734   gcc_assert (int_mode != BLKmode);
1735
1736   target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
1737                                     target, unsignedp);
1738   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1739 }
1740
1741 /* Generate code to extract a byte-field from STR_RTX
1742    containing BITSIZE bits, starting at BITNUM,
1743    and put it in TARGET if possible (if TARGET is nonzero).
1744    Regardless of TARGET, we return the rtx for where the value is placed.
1745
1746    STR_RTX is the structure containing the byte (a REG or MEM).
1747    UNSIGNEDP is nonzero if this is an unsigned bit field.
1748    MODE is the natural mode of the field value once extracted.
1749    TMODE is the mode the caller would like the value to have;
1750    but the value may be returned with type MODE instead.
1751
1752    If a TARGET is specified and we can store in it at no extra cost,
1753    we do so, and return TARGET.
1754    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1755    if they are equally easy.  */
1756
1757 rtx
1758 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1759                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1760                    machine_mode mode, machine_mode tmode)
1761 {
1762   machine_mode mode1;
1763
1764   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1765   if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1766     mode1 = GET_MODE (str_rtx);
1767   else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1768     mode1 = GET_MODE (target);
1769   else
1770     mode1 = tmode;
1771
1772   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1773     {
1774       rtx result;
1775
1776       /* Extraction of a full MODE1 value can be done with a load as long as
1777          the field is on a byte boundary and is sufficiently aligned.  */
1778       if (simple_mem_bitfield_p (str_rtx, bitsize, bitnum, mode1))
1779         result = adjust_bitfield_address (str_rtx, mode1,
1780                                           bitnum / BITS_PER_UNIT);
1781       else
1782         {
1783           str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1784                                           &bitnum);
1785           result = extract_fixed_bit_field_1 (mode, str_rtx, bitsize, bitnum,
1786                                               target, unsignedp);
1787         }
1788
1789       return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1790     }
1791
1792   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1793                               target, mode, tmode, true);
1794 }
1795 \f
1796 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1797    from bit BITNUM of OP0.
1798
1799    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1800    If TARGET is nonzero, attempts to store the value there
1801    and return TARGET, but this is not guaranteed.
1802    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1803
1804 static rtx
1805 extract_fixed_bit_field (machine_mode tmode, rtx op0,
1806                          unsigned HOST_WIDE_INT bitsize,
1807                          unsigned HOST_WIDE_INT bitnum, rtx target,
1808                          int unsignedp)
1809 {
1810   if (MEM_P (op0))
1811     {
1812       machine_mode mode
1813         = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1814                          MEM_VOLATILE_P (op0));
1815
1816       if (mode == VOIDmode)
1817         /* The only way this should occur is if the field spans word
1818            boundaries.  */
1819         return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1820
1821       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1822     }
1823
1824   return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1825                                     target, unsignedp);
1826 }
1827
1828 /* Helper function for extract_fixed_bit_field, extracts
1829    the bit field always using the MODE of OP0.  */
1830
1831 static rtx
1832 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
1833                            unsigned HOST_WIDE_INT bitsize,
1834                            unsigned HOST_WIDE_INT bitnum, rtx target,
1835                            int unsignedp)
1836 {
1837   machine_mode mode = GET_MODE (op0);
1838   gcc_assert (SCALAR_INT_MODE_P (mode));
1839
1840   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1841      for invalid input, such as extract equivalent of f5 from
1842      gcc.dg/pr48335-2.c.  */
1843
1844   if (BYTES_BIG_ENDIAN)
1845     /* BITNUM is the distance between our msb and that of OP0.
1846        Convert it to the distance from the lsb.  */
1847     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1848
1849   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1850      We have reduced the big-endian case to the little-endian case.  */
1851
1852   if (unsignedp)
1853     {
1854       if (bitnum)
1855         {
1856           /* If the field does not already start at the lsb,
1857              shift it so it does.  */
1858           /* Maybe propagate the target for the shift.  */
1859           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1860           if (tmode != mode)
1861             subtarget = 0;
1862           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1863         }
1864       /* Convert the value to the desired mode.  */
1865       if (mode != tmode)
1866         op0 = convert_to_mode (tmode, op0, 1);
1867
1868       /* Unless the msb of the field used to be the msb when we shifted,
1869          mask out the upper bits.  */
1870
1871       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
1872         return expand_binop (GET_MODE (op0), and_optab, op0,
1873                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1874                              target, 1, OPTAB_LIB_WIDEN);
1875       return op0;
1876     }
1877
1878   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1879      then arithmetic-shift its lsb to the lsb of the word.  */
1880   op0 = force_reg (mode, op0);
1881
1882   /* Find the narrowest integer mode that contains the field.  */
1883
1884   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1885        mode = GET_MODE_WIDER_MODE (mode))
1886     if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
1887       {
1888         op0 = convert_to_mode (mode, op0, 0);
1889         break;
1890       }
1891
1892   if (mode != tmode)
1893     target = 0;
1894
1895   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
1896     {
1897       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
1898       /* Maybe propagate the target for the shift.  */
1899       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1900       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1901     }
1902
1903   return expand_shift (RSHIFT_EXPR, mode, op0,
1904                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1905 }
1906
1907 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1908    VALUE << BITPOS.  */
1909
1910 static rtx
1911 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
1912               int bitpos)
1913 {
1914   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
1915 }
1916 \f
1917 /* Extract a bit field that is split across two words
1918    and return an RTX for the result.
1919
1920    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1921    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1922    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1923
1924 static rtx
1925 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1926                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1927 {
1928   unsigned int unit;
1929   unsigned int bitsdone = 0;
1930   rtx result = NULL_RTX;
1931   int first = 1;
1932
1933   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1934      much at a time.  */
1935   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1936     unit = BITS_PER_WORD;
1937   else
1938     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1939
1940   while (bitsdone < bitsize)
1941     {
1942       unsigned HOST_WIDE_INT thissize;
1943       rtx part, word;
1944       unsigned HOST_WIDE_INT thispos;
1945       unsigned HOST_WIDE_INT offset;
1946
1947       offset = (bitpos + bitsdone) / unit;
1948       thispos = (bitpos + bitsdone) % unit;
1949
1950       /* THISSIZE must not overrun a word boundary.  Otherwise,
1951          extract_fixed_bit_field will call us again, and we will mutually
1952          recurse forever.  */
1953       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1954       thissize = MIN (thissize, unit - thispos);
1955
1956       /* If OP0 is a register, then handle OFFSET here.
1957
1958          When handling multiword bitfields, extract_bit_field may pass
1959          down a word_mode SUBREG of a larger REG for a bitfield that actually
1960          crosses a word boundary.  Thus, for a SUBREG, we must find
1961          the current word starting from the base register.  */
1962       if (GET_CODE (op0) == SUBREG)
1963         {
1964           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1965           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1966                                         GET_MODE (SUBREG_REG (op0)));
1967           offset = 0;
1968         }
1969       else if (REG_P (op0))
1970         {
1971           word = operand_subword_force (op0, offset, GET_MODE (op0));
1972           offset = 0;
1973         }
1974       else
1975         word = op0;
1976
1977       /* Extract the parts in bit-counting order,
1978          whose meaning is determined by BYTES_PER_UNIT.
1979          OFFSET is in UNITs, and UNIT is in bits.  */
1980       part = extract_fixed_bit_field (word_mode, word, thissize,
1981                                       offset * unit + thispos, 0, 1);
1982       bitsdone += thissize;
1983
1984       /* Shift this part into place for the result.  */
1985       if (BYTES_BIG_ENDIAN)
1986         {
1987           if (bitsize != bitsdone)
1988             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1989                                  bitsize - bitsdone, 0, 1);
1990         }
1991       else
1992         {
1993           if (bitsdone != thissize)
1994             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1995                                  bitsdone - thissize, 0, 1);
1996         }
1997
1998       if (first)
1999         result = part;
2000       else
2001         /* Combine the parts with bitwise or.  This works
2002            because we extracted each part as an unsigned bit field.  */
2003         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2004                                OPTAB_LIB_WIDEN);
2005
2006       first = 0;
2007     }
2008
2009   /* Unsigned bit field: we are done.  */
2010   if (unsignedp)
2011     return result;
2012   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2013   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2014                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2015   return expand_shift (RSHIFT_EXPR, word_mode, result,
2016                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2017 }
2018 \f
2019 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2020    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2021    MODE, fill the upper bits with zeros.  Fail if the layout of either
2022    mode is unknown (as for CC modes) or if the extraction would involve
2023    unprofitable mode punning.  Return the value on success, otherwise
2024    return null.
2025
2026    This is different from gen_lowpart* in these respects:
2027
2028      - the returned value must always be considered an rvalue
2029
2030      - when MODE is wider than SRC_MODE, the extraction involves
2031        a zero extension
2032
2033      - when MODE is smaller than SRC_MODE, the extraction involves
2034        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2035
2036    In other words, this routine performs a computation, whereas the
2037    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2038    operations.  */
2039
2040 rtx
2041 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2042 {
2043   machine_mode int_mode, src_int_mode;
2044
2045   if (mode == src_mode)
2046     return src;
2047
2048   if (CONSTANT_P (src))
2049     {
2050       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2051          fails, it will happily create (subreg (symbol_ref)) or similar
2052          invalid SUBREGs.  */
2053       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2054       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2055       if (ret)
2056         return ret;
2057
2058       if (GET_MODE (src) == VOIDmode
2059           || !validate_subreg (mode, src_mode, src, byte))
2060         return NULL_RTX;
2061
2062       src = force_reg (GET_MODE (src), src);
2063       return gen_rtx_SUBREG (mode, src, byte);
2064     }
2065
2066   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2067     return NULL_RTX;
2068
2069   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2070       && MODES_TIEABLE_P (mode, src_mode))
2071     {
2072       rtx x = gen_lowpart_common (mode, src);
2073       if (x)
2074         return x;
2075     }
2076
2077   src_int_mode = int_mode_for_mode (src_mode);
2078   int_mode = int_mode_for_mode (mode);
2079   if (src_int_mode == BLKmode || int_mode == BLKmode)
2080     return NULL_RTX;
2081
2082   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2083     return NULL_RTX;
2084   if (!MODES_TIEABLE_P (int_mode, mode))
2085     return NULL_RTX;
2086
2087   src = gen_lowpart (src_int_mode, src);
2088   src = convert_modes (int_mode, src_int_mode, src, true);
2089   src = gen_lowpart (mode, src);
2090   return src;
2091 }
2092 \f
2093 /* Add INC into TARGET.  */
2094
2095 void
2096 expand_inc (rtx target, rtx inc)
2097 {
2098   rtx value = expand_binop (GET_MODE (target), add_optab,
2099                             target, inc,
2100                             target, 0, OPTAB_LIB_WIDEN);
2101   if (value != target)
2102     emit_move_insn (target, value);
2103 }
2104
2105 /* Subtract DEC from TARGET.  */
2106
2107 void
2108 expand_dec (rtx target, rtx dec)
2109 {
2110   rtx value = expand_binop (GET_MODE (target), sub_optab,
2111                             target, dec,
2112                             target, 0, OPTAB_LIB_WIDEN);
2113   if (value != target)
2114     emit_move_insn (target, value);
2115 }
2116 \f
2117 /* Output a shift instruction for expression code CODE,
2118    with SHIFTED being the rtx for the value to shift,
2119    and AMOUNT the rtx for the amount to shift by.
2120    Store the result in the rtx TARGET, if that is convenient.
2121    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2122    Return the rtx for where the value is.  */
2123
2124 static rtx
2125 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2126                 rtx amount, rtx target, int unsignedp)
2127 {
2128   rtx op1, temp = 0;
2129   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2130   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2131   optab lshift_optab = ashl_optab;
2132   optab rshift_arith_optab = ashr_optab;
2133   optab rshift_uns_optab = lshr_optab;
2134   optab lrotate_optab = rotl_optab;
2135   optab rrotate_optab = rotr_optab;
2136   machine_mode op1_mode;
2137   machine_mode scalar_mode = mode;
2138   int attempt;
2139   bool speed = optimize_insn_for_speed_p ();
2140
2141   if (VECTOR_MODE_P (mode))
2142     scalar_mode = GET_MODE_INNER (mode);
2143   op1 = amount;
2144   op1_mode = GET_MODE (op1);
2145
2146   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2147      shift amount is a vector, use the vector/vector shift patterns.  */
2148   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2149     {
2150       lshift_optab = vashl_optab;
2151       rshift_arith_optab = vashr_optab;
2152       rshift_uns_optab = vlshr_optab;
2153       lrotate_optab = vrotl_optab;
2154       rrotate_optab = vrotr_optab;
2155     }
2156
2157   /* Previously detected shift-counts computed by NEGATE_EXPR
2158      and shifted in the other direction; but that does not work
2159      on all machines.  */
2160
2161   if (SHIFT_COUNT_TRUNCATED)
2162     {
2163       if (CONST_INT_P (op1)
2164           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2165               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2166         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2167                        % GET_MODE_BITSIZE (scalar_mode));
2168       else if (GET_CODE (op1) == SUBREG
2169                && subreg_lowpart_p (op1)
2170                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2171                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2172         op1 = SUBREG_REG (op1);
2173     }
2174
2175   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2176      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2177      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2178      amount instead.  */
2179   if (rotate
2180       && CONST_INT_P (op1)
2181       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2182                    GET_MODE_BITSIZE (scalar_mode) - 1))
2183     {
2184       op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2185       left = !left;
2186       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2187     }
2188
2189   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2190      Note that this is not the case for bigger values.  For instance a rotation
2191      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2192      0x04030201 (bswapsi).  */
2193   if (rotate
2194       && CONST_INT_P (op1)
2195       && INTVAL (op1) == BITS_PER_UNIT
2196       && GET_MODE_SIZE (scalar_mode) == 2
2197       && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2198     return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2199                                   unsignedp);
2200
2201   if (op1 == const0_rtx)
2202     return shifted;
2203
2204   /* Check whether its cheaper to implement a left shift by a constant
2205      bit count by a sequence of additions.  */
2206   if (code == LSHIFT_EXPR
2207       && CONST_INT_P (op1)
2208       && INTVAL (op1) > 0
2209       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2210       && INTVAL (op1) < MAX_BITS_PER_WORD
2211       && (shift_cost (speed, mode, INTVAL (op1))
2212           > INTVAL (op1) * add_cost (speed, mode))
2213       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2214     {
2215       int i;
2216       for (i = 0; i < INTVAL (op1); i++)
2217         {
2218           temp = force_reg (mode, shifted);
2219           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2220                                   unsignedp, OPTAB_LIB_WIDEN);
2221         }
2222       return shifted;
2223     }
2224
2225   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2226     {
2227       enum optab_methods methods;
2228
2229       if (attempt == 0)
2230         methods = OPTAB_DIRECT;
2231       else if (attempt == 1)
2232         methods = OPTAB_WIDEN;
2233       else
2234         methods = OPTAB_LIB_WIDEN;
2235
2236       if (rotate)
2237         {
2238           /* Widening does not work for rotation.  */
2239           if (methods == OPTAB_WIDEN)
2240             continue;
2241           else if (methods == OPTAB_LIB_WIDEN)
2242             {
2243               /* If we have been unable to open-code this by a rotation,
2244                  do it as the IOR of two shifts.  I.e., to rotate A
2245                  by N bits, compute
2246                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2247                  where C is the bitsize of A.
2248
2249                  It is theoretically possible that the target machine might
2250                  not be able to perform either shift and hence we would
2251                  be making two libcalls rather than just the one for the
2252                  shift (similarly if IOR could not be done).  We will allow
2253                  this extremely unlikely lossage to avoid complicating the
2254                  code below.  */
2255
2256               rtx subtarget = target == shifted ? 0 : target;
2257               rtx new_amount, other_amount;
2258               rtx temp1;
2259
2260               new_amount = op1;
2261               if (op1 == const0_rtx)
2262                 return shifted;
2263               else if (CONST_INT_P (op1))
2264                 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2265                                         - INTVAL (op1));
2266               else
2267                 {
2268                   other_amount
2269                     = simplify_gen_unary (NEG, GET_MODE (op1),
2270                                           op1, GET_MODE (op1));
2271                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2272                   other_amount
2273                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2274                                            gen_int_mode (mask, GET_MODE (op1)));
2275                 }
2276
2277               shifted = force_reg (mode, shifted);
2278
2279               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2280                                      mode, shifted, new_amount, 0, 1);
2281               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2282                                       mode, shifted, other_amount,
2283                                       subtarget, 1);
2284               return expand_binop (mode, ior_optab, temp, temp1, target,
2285                                    unsignedp, methods);
2286             }
2287
2288           temp = expand_binop (mode,
2289                                left ? lrotate_optab : rrotate_optab,
2290                                shifted, op1, target, unsignedp, methods);
2291         }
2292       else if (unsignedp)
2293         temp = expand_binop (mode,
2294                              left ? lshift_optab : rshift_uns_optab,
2295                              shifted, op1, target, unsignedp, methods);
2296
2297       /* Do arithmetic shifts.
2298          Also, if we are going to widen the operand, we can just as well
2299          use an arithmetic right-shift instead of a logical one.  */
2300       if (temp == 0 && ! rotate
2301           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2302         {
2303           enum optab_methods methods1 = methods;
2304
2305           /* If trying to widen a log shift to an arithmetic shift,
2306              don't accept an arithmetic shift of the same size.  */
2307           if (unsignedp)
2308             methods1 = OPTAB_MUST_WIDEN;
2309
2310           /* Arithmetic shift */
2311
2312           temp = expand_binop (mode,
2313                                left ? lshift_optab : rshift_arith_optab,
2314                                shifted, op1, target, unsignedp, methods1);
2315         }
2316
2317       /* We used to try extzv here for logical right shifts, but that was
2318          only useful for one machine, the VAX, and caused poor code
2319          generation there for lshrdi3, so the code was deleted and a
2320          define_expand for lshrsi3 was added to vax.md.  */
2321     }
2322
2323   gcc_assert (temp);
2324   return temp;
2325 }
2326
2327 /* Output a shift instruction for expression code CODE,
2328    with SHIFTED being the rtx for the value to shift,
2329    and AMOUNT the amount to shift by.
2330    Store the result in the rtx TARGET, if that is convenient.
2331    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2332    Return the rtx for where the value is.  */
2333
2334 rtx
2335 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2336               int amount, rtx target, int unsignedp)
2337 {
2338   return expand_shift_1 (code, mode,
2339                          shifted, GEN_INT (amount), target, unsignedp);
2340 }
2341
2342 /* Output a shift instruction for expression code CODE,
2343    with SHIFTED being the rtx for the value to shift,
2344    and AMOUNT the tree for the amount to shift by.
2345    Store the result in the rtx TARGET, if that is convenient.
2346    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2347    Return the rtx for where the value is.  */
2348
2349 rtx
2350 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2351                        tree amount, rtx target, int unsignedp)
2352 {
2353   return expand_shift_1 (code, mode,
2354                          shifted, expand_normal (amount), target, unsignedp);
2355 }
2356
2357 \f
2358 /* Indicates the type of fixup needed after a constant multiplication.
2359    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2360    the result should be negated, and ADD_VARIANT means that the
2361    multiplicand should be added to the result.  */
2362 enum mult_variant {basic_variant, negate_variant, add_variant};
2363
2364 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2365                         const struct mult_cost *, machine_mode mode);
2366 static bool choose_mult_variant (machine_mode, HOST_WIDE_INT,
2367                                  struct algorithm *, enum mult_variant *, int);
2368 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2369                               const struct algorithm *, enum mult_variant);
2370 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2371 static rtx extract_high_half (machine_mode, rtx);
2372 static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2373 static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
2374                                        int, int);
2375 /* Compute and return the best algorithm for multiplying by T.
2376    The algorithm must cost less than cost_limit
2377    If retval.cost >= COST_LIMIT, no algorithm was found and all
2378    other field of the returned struct are undefined.
2379    MODE is the machine mode of the multiplication.  */
2380
2381 static void
2382 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2383             const struct mult_cost *cost_limit, machine_mode mode)
2384 {
2385   int m;
2386   struct algorithm *alg_in, *best_alg;
2387   struct mult_cost best_cost;
2388   struct mult_cost new_limit;
2389   int op_cost, op_latency;
2390   unsigned HOST_WIDE_INT orig_t = t;
2391   unsigned HOST_WIDE_INT q;
2392   int maxm, hash_index;
2393   bool cache_hit = false;
2394   enum alg_code cache_alg = alg_zero;
2395   bool speed = optimize_insn_for_speed_p ();
2396   machine_mode imode;
2397   struct alg_hash_entry *entry_ptr;
2398
2399   /* Indicate that no algorithm is yet found.  If no algorithm
2400      is found, this value will be returned and indicate failure.  */
2401   alg_out->cost.cost = cost_limit->cost + 1;
2402   alg_out->cost.latency = cost_limit->latency + 1;
2403
2404   if (cost_limit->cost < 0
2405       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2406     return;
2407
2408   /* Be prepared for vector modes.  */
2409   imode = GET_MODE_INNER (mode);
2410   if (imode == VOIDmode)
2411     imode = mode;
2412
2413   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2414
2415   /* Restrict the bits of "t" to the multiplication's mode.  */
2416   t &= GET_MODE_MASK (imode);
2417
2418   /* t == 1 can be done in zero cost.  */
2419   if (t == 1)
2420     {
2421       alg_out->ops = 1;
2422       alg_out->cost.cost = 0;
2423       alg_out->cost.latency = 0;
2424       alg_out->op[0] = alg_m;
2425       return;
2426     }
2427
2428   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2429      fail now.  */
2430   if (t == 0)
2431     {
2432       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2433         return;
2434       else
2435         {
2436           alg_out->ops = 1;
2437           alg_out->cost.cost = zero_cost (speed);
2438           alg_out->cost.latency = zero_cost (speed);
2439           alg_out->op[0] = alg_zero;
2440           return;
2441         }
2442     }
2443
2444   /* We'll be needing a couple extra algorithm structures now.  */
2445
2446   alg_in = XALLOCA (struct algorithm);
2447   best_alg = XALLOCA (struct algorithm);
2448   best_cost = *cost_limit;
2449
2450   /* Compute the hash index.  */
2451   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2452
2453   /* See if we already know what to do for T.  */
2454   entry_ptr = alg_hash_entry_ptr (hash_index);
2455   if (entry_ptr->t == t
2456       && entry_ptr->mode == mode
2457       && entry_ptr->mode == mode
2458       && entry_ptr->speed == speed
2459       && entry_ptr->alg != alg_unknown)
2460     {
2461       cache_alg = entry_ptr->alg;
2462
2463       if (cache_alg == alg_impossible)
2464         {
2465           /* The cache tells us that it's impossible to synthesize
2466              multiplication by T within entry_ptr->cost.  */
2467           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2468             /* COST_LIMIT is at least as restrictive as the one
2469                recorded in the hash table, in which case we have no
2470                hope of synthesizing a multiplication.  Just
2471                return.  */
2472             return;
2473
2474           /* If we get here, COST_LIMIT is less restrictive than the
2475              one recorded in the hash table, so we may be able to
2476              synthesize a multiplication.  Proceed as if we didn't
2477              have the cache entry.  */
2478         }
2479       else
2480         {
2481           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2482             /* The cached algorithm shows that this multiplication
2483                requires more cost than COST_LIMIT.  Just return.  This
2484                way, we don't clobber this cache entry with
2485                alg_impossible but retain useful information.  */
2486             return;
2487
2488           cache_hit = true;
2489
2490           switch (cache_alg)
2491             {
2492             case alg_shift:
2493               goto do_alg_shift;
2494
2495             case alg_add_t_m2:
2496             case alg_sub_t_m2:
2497               goto do_alg_addsub_t_m2;
2498
2499             case alg_add_factor:
2500             case alg_sub_factor:
2501               goto do_alg_addsub_factor;
2502
2503             case alg_add_t2_m:
2504               goto do_alg_add_t2_m;
2505
2506             case alg_sub_t2_m:
2507               goto do_alg_sub_t2_m;
2508
2509             default:
2510               gcc_unreachable ();
2511             }
2512         }
2513     }
2514
2515   /* If we have a group of zero bits at the low-order part of T, try
2516      multiplying by the remaining bits and then doing a shift.  */
2517
2518   if ((t & 1) == 0)
2519     {
2520     do_alg_shift:
2521       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2522       if (m < maxm)
2523         {
2524           q = t >> m;
2525           /* The function expand_shift will choose between a shift and
2526              a sequence of additions, so the observed cost is given as
2527              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2528           op_cost = m * add_cost (speed, mode);
2529           if (shift_cost (speed, mode, m) < op_cost)
2530             op_cost = shift_cost (speed, mode, m);
2531           new_limit.cost = best_cost.cost - op_cost;
2532           new_limit.latency = best_cost.latency - op_cost;
2533           synth_mult (alg_in, q, &new_limit, mode);
2534
2535           alg_in->cost.cost += op_cost;
2536           alg_in->cost.latency += op_cost;
2537           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2538             {
2539               struct algorithm *x;
2540               best_cost = alg_in->cost;
2541               x = alg_in, alg_in = best_alg, best_alg = x;
2542               best_alg->log[best_alg->ops] = m;
2543               best_alg->op[best_alg->ops] = alg_shift;
2544             }
2545
2546           /* See if treating ORIG_T as a signed number yields a better
2547              sequence.  Try this sequence only for a negative ORIG_T
2548              as it would be useless for a non-negative ORIG_T.  */
2549           if ((HOST_WIDE_INT) orig_t < 0)
2550             {
2551               /* Shift ORIG_T as follows because a right shift of a
2552                  negative-valued signed type is implementation
2553                  defined.  */
2554               q = ~(~orig_t >> m);
2555               /* The function expand_shift will choose between a shift
2556                  and a sequence of additions, so the observed cost is
2557                  given as MIN (m * add_cost(speed, mode),
2558                  shift_cost(speed, mode, m)).  */
2559               op_cost = m * add_cost (speed, mode);
2560               if (shift_cost (speed, mode, m) < op_cost)
2561                 op_cost = shift_cost (speed, mode, m);
2562               new_limit.cost = best_cost.cost - op_cost;
2563               new_limit.latency = best_cost.latency - op_cost;
2564               synth_mult (alg_in, q, &new_limit, mode);
2565
2566               alg_in->cost.cost += op_cost;
2567               alg_in->cost.latency += op_cost;
2568               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2569                 {
2570                   struct algorithm *x;
2571                   best_cost = alg_in->cost;
2572                   x = alg_in, alg_in = best_alg, best_alg = x;
2573                   best_alg->log[best_alg->ops] = m;
2574                   best_alg->op[best_alg->ops] = alg_shift;
2575                 }
2576             }
2577         }
2578       if (cache_hit)
2579         goto done;
2580     }
2581
2582   /* If we have an odd number, add or subtract one.  */
2583   if ((t & 1) != 0)
2584     {
2585       unsigned HOST_WIDE_INT w;
2586
2587     do_alg_addsub_t_m2:
2588       for (w = 1; (w & t) != 0; w <<= 1)
2589         ;
2590       /* If T was -1, then W will be zero after the loop.  This is another
2591          case where T ends with ...111.  Handling this with (T + 1) and
2592          subtract 1 produces slightly better code and results in algorithm
2593          selection much faster than treating it like the ...0111 case
2594          below.  */
2595       if (w == 0
2596           || (w > 2
2597               /* Reject the case where t is 3.
2598                  Thus we prefer addition in that case.  */
2599               && t != 3))
2600         {
2601           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2602
2603           op_cost = add_cost (speed, mode);
2604           new_limit.cost = best_cost.cost - op_cost;
2605           new_limit.latency = best_cost.latency - op_cost;
2606           synth_mult (alg_in, t + 1, &new_limit, mode);
2607
2608           alg_in->cost.cost += op_cost;
2609           alg_in->cost.latency += op_cost;
2610           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2611             {
2612               struct algorithm *x;
2613               best_cost = alg_in->cost;
2614               x = alg_in, alg_in = best_alg, best_alg = x;
2615               best_alg->log[best_alg->ops] = 0;
2616               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2617             }
2618         }
2619       else
2620         {
2621           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2622
2623           op_cost = add_cost (speed, mode);
2624           new_limit.cost = best_cost.cost - op_cost;
2625           new_limit.latency = best_cost.latency - op_cost;
2626           synth_mult (alg_in, t - 1, &new_limit, mode);
2627
2628           alg_in->cost.cost += op_cost;
2629           alg_in->cost.latency += op_cost;
2630           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2631             {
2632               struct algorithm *x;
2633               best_cost = alg_in->cost;
2634               x = alg_in, alg_in = best_alg, best_alg = x;
2635               best_alg->log[best_alg->ops] = 0;
2636               best_alg->op[best_alg->ops] = alg_add_t_m2;
2637             }
2638         }
2639
2640       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2641          quickly with a - a * n for some appropriate constant n.  */
2642       m = exact_log2 (-orig_t + 1);
2643       if (m >= 0 && m < maxm)
2644         {
2645           op_cost = shiftsub1_cost (speed, mode, m);
2646           new_limit.cost = best_cost.cost - op_cost;
2647           new_limit.latency = best_cost.latency - op_cost;
2648           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2649                       &new_limit, mode);
2650
2651           alg_in->cost.cost += op_cost;
2652           alg_in->cost.latency += op_cost;
2653           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2654             {
2655               struct algorithm *x;
2656               best_cost = alg_in->cost;
2657               x = alg_in, alg_in = best_alg, best_alg = x;
2658               best_alg->log[best_alg->ops] = m;
2659               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2660             }
2661         }
2662
2663       if (cache_hit)
2664         goto done;
2665     }
2666
2667   /* Look for factors of t of the form
2668      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2669      If we find such a factor, we can multiply by t using an algorithm that
2670      multiplies by q, shift the result by m and add/subtract it to itself.
2671
2672      We search for large factors first and loop down, even if large factors
2673      are less probable than small; if we find a large factor we will find a
2674      good sequence quickly, and therefore be able to prune (by decreasing
2675      COST_LIMIT) the search.  */
2676
2677  do_alg_addsub_factor:
2678   for (m = floor_log2 (t - 1); m >= 2; m--)
2679     {
2680       unsigned HOST_WIDE_INT d;
2681
2682       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2683       if (t % d == 0 && t > d && m < maxm
2684           && (!cache_hit || cache_alg == alg_add_factor))
2685         {
2686           /* If the target has a cheap shift-and-add instruction use
2687              that in preference to a shift insn followed by an add insn.
2688              Assume that the shift-and-add is "atomic" with a latency
2689              equal to its cost, otherwise assume that on superscalar
2690              hardware the shift may be executed concurrently with the
2691              earlier steps in the algorithm.  */
2692           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2693           if (shiftadd_cost (speed, mode, m) < op_cost)
2694             {
2695               op_cost = shiftadd_cost (speed, mode, m);
2696               op_latency = op_cost;
2697             }
2698           else
2699             op_latency = add_cost (speed, mode);
2700
2701           new_limit.cost = best_cost.cost - op_cost;
2702           new_limit.latency = best_cost.latency - op_latency;
2703           synth_mult (alg_in, t / d, &new_limit, mode);
2704
2705           alg_in->cost.cost += op_cost;
2706           alg_in->cost.latency += op_latency;
2707           if (alg_in->cost.latency < op_cost)
2708             alg_in->cost.latency = op_cost;
2709           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2710             {
2711               struct algorithm *x;
2712               best_cost = alg_in->cost;
2713               x = alg_in, alg_in = best_alg, best_alg = x;
2714               best_alg->log[best_alg->ops] = m;
2715               best_alg->op[best_alg->ops] = alg_add_factor;
2716             }
2717           /* Other factors will have been taken care of in the recursion.  */
2718           break;
2719         }
2720
2721       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2722       if (t % d == 0 && t > d && m < maxm
2723           && (!cache_hit || cache_alg == alg_sub_factor))
2724         {
2725           /* If the target has a cheap shift-and-subtract insn use
2726              that in preference to a shift insn followed by a sub insn.
2727              Assume that the shift-and-sub is "atomic" with a latency
2728              equal to it's cost, otherwise assume that on superscalar
2729              hardware the shift may be executed concurrently with the
2730              earlier steps in the algorithm.  */
2731           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2732           if (shiftsub0_cost (speed, mode, m) < op_cost)
2733             {
2734               op_cost = shiftsub0_cost (speed, mode, m);
2735               op_latency = op_cost;
2736             }
2737           else
2738             op_latency = add_cost (speed, mode);
2739
2740           new_limit.cost = best_cost.cost - op_cost;
2741           new_limit.latency = best_cost.latency - op_latency;
2742           synth_mult (alg_in, t / d, &new_limit, mode);
2743
2744           alg_in->cost.cost += op_cost;
2745           alg_in->cost.latency += op_latency;
2746           if (alg_in->cost.latency < op_cost)
2747             alg_in->cost.latency = op_cost;
2748           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2749             {
2750               struct algorithm *x;
2751               best_cost = alg_in->cost;
2752               x = alg_in, alg_in = best_alg, best_alg = x;
2753               best_alg->log[best_alg->ops] = m;
2754               best_alg->op[best_alg->ops] = alg_sub_factor;
2755             }
2756           break;
2757         }
2758     }
2759   if (cache_hit)
2760     goto done;
2761
2762   /* Try shift-and-add (load effective address) instructions,
2763      i.e. do a*3, a*5, a*9.  */
2764   if ((t & 1) != 0)
2765     {
2766     do_alg_add_t2_m:
2767       q = t - 1;
2768       q = q & -q;
2769       m = exact_log2 (q);
2770       if (m >= 0 && m < maxm)
2771         {
2772           op_cost = shiftadd_cost (speed, mode, m);
2773           new_limit.cost = best_cost.cost - op_cost;
2774           new_limit.latency = best_cost.latency - op_cost;
2775           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2776
2777           alg_in->cost.cost += op_cost;
2778           alg_in->cost.latency += op_cost;
2779           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2780             {
2781               struct algorithm *x;
2782               best_cost = alg_in->cost;
2783               x = alg_in, alg_in = best_alg, best_alg = x;
2784               best_alg->log[best_alg->ops] = m;
2785               best_alg->op[best_alg->ops] = alg_add_t2_m;
2786             }
2787         }
2788       if (cache_hit)
2789         goto done;
2790
2791     do_alg_sub_t2_m:
2792       q = t + 1;
2793       q = q & -q;
2794       m = exact_log2 (q);
2795       if (m >= 0 && m < maxm)
2796         {
2797           op_cost = shiftsub0_cost (speed, mode, m);
2798           new_limit.cost = best_cost.cost - op_cost;
2799           new_limit.latency = best_cost.latency - op_cost;
2800           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2801
2802           alg_in->cost.cost += op_cost;
2803           alg_in->cost.latency += op_cost;
2804           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2805             {
2806               struct algorithm *x;
2807               best_cost = alg_in->cost;
2808               x = alg_in, alg_in = best_alg, best_alg = x;
2809               best_alg->log[best_alg->ops] = m;
2810               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2811             }
2812         }
2813       if (cache_hit)
2814         goto done;
2815     }
2816
2817  done:
2818   /* If best_cost has not decreased, we have not found any algorithm.  */
2819   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2820     {
2821       /* We failed to find an algorithm.  Record alg_impossible for
2822          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2823          we are asked to find an algorithm for T within the same or
2824          lower COST_LIMIT, we can immediately return to the
2825          caller.  */
2826       entry_ptr->t = t;
2827       entry_ptr->mode = mode;
2828       entry_ptr->speed = speed;
2829       entry_ptr->alg = alg_impossible;
2830       entry_ptr->cost = *cost_limit;
2831       return;
2832     }
2833
2834   /* Cache the result.  */
2835   if (!cache_hit)
2836     {
2837       entry_ptr->t = t;
2838       entry_ptr->mode = mode;
2839       entry_ptr->speed = speed;
2840       entry_ptr->alg = best_alg->op[best_alg->ops];
2841       entry_ptr->cost.cost = best_cost.cost;
2842       entry_ptr->cost.latency = best_cost.latency;
2843     }
2844
2845   /* If we are getting a too long sequence for `struct algorithm'
2846      to record, make this search fail.  */
2847   if (best_alg->ops == MAX_BITS_PER_WORD)
2848     return;
2849
2850   /* Copy the algorithm from temporary space to the space at alg_out.
2851      We avoid using structure assignment because the majority of
2852      best_alg is normally undefined, and this is a critical function.  */
2853   alg_out->ops = best_alg->ops + 1;
2854   alg_out->cost = best_cost;
2855   memcpy (alg_out->op, best_alg->op,
2856           alg_out->ops * sizeof *alg_out->op);
2857   memcpy (alg_out->log, best_alg->log,
2858           alg_out->ops * sizeof *alg_out->log);
2859 }
2860 \f
2861 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2862    Try three variations:
2863
2864        - a shift/add sequence based on VAL itself
2865        - a shift/add sequence based on -VAL, followed by a negation
2866        - a shift/add sequence based on VAL - 1, followed by an addition.
2867
2868    Return true if the cheapest of these cost less than MULT_COST,
2869    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2870
2871 static bool
2872 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
2873                      struct algorithm *alg, enum mult_variant *variant,
2874                      int mult_cost)
2875 {
2876   struct algorithm alg2;
2877   struct mult_cost limit;
2878   int op_cost;
2879   bool speed = optimize_insn_for_speed_p ();
2880
2881   /* Fail quickly for impossible bounds.  */
2882   if (mult_cost < 0)
2883     return false;
2884
2885   /* Ensure that mult_cost provides a reasonable upper bound.
2886      Any constant multiplication can be performed with less
2887      than 2 * bits additions.  */
2888   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2889   if (mult_cost > op_cost)
2890     mult_cost = op_cost;
2891
2892   *variant = basic_variant;
2893   limit.cost = mult_cost;
2894   limit.latency = mult_cost;
2895   synth_mult (alg, val, &limit, mode);
2896
2897   /* This works only if the inverted value actually fits in an
2898      `unsigned int' */
2899   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2900     {
2901       op_cost = neg_cost (speed, mode);
2902       if (MULT_COST_LESS (&alg->cost, mult_cost))
2903         {
2904           limit.cost = alg->cost.cost - op_cost;
2905           limit.latency = alg->cost.latency - op_cost;
2906         }
2907       else
2908         {
2909           limit.cost = mult_cost - op_cost;
2910           limit.latency = mult_cost - op_cost;
2911         }
2912
2913       synth_mult (&alg2, -val, &limit, mode);
2914       alg2.cost.cost += op_cost;
2915       alg2.cost.latency += op_cost;
2916       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2917         *alg = alg2, *variant = negate_variant;
2918     }
2919
2920   /* This proves very useful for division-by-constant.  */
2921   op_cost = add_cost (speed, mode);
2922   if (MULT_COST_LESS (&alg->cost, mult_cost))
2923     {
2924       limit.cost = alg->cost.cost - op_cost;
2925       limit.latency = alg->cost.latency - op_cost;
2926     }
2927   else
2928     {
2929       limit.cost = mult_cost - op_cost;
2930       limit.latency = mult_cost - op_cost;
2931     }
2932
2933   synth_mult (&alg2, val - 1, &limit, mode);
2934   alg2.cost.cost += op_cost;
2935   alg2.cost.latency += op_cost;
2936   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2937     *alg = alg2, *variant = add_variant;
2938
2939   return MULT_COST_LESS (&alg->cost, mult_cost);
2940 }
2941
2942 /* A subroutine of expand_mult, used for constant multiplications.
2943    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2944    convenient.  Use the shift/add sequence described by ALG and apply
2945    the final fixup specified by VARIANT.  */
2946
2947 static rtx
2948 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
2949                    rtx target, const struct algorithm *alg,
2950                    enum mult_variant variant)
2951 {
2952   HOST_WIDE_INT val_so_far;
2953   rtx_insn *insn;
2954   rtx accum, tem;
2955   int opno;
2956   machine_mode nmode;
2957
2958   /* Avoid referencing memory over and over and invalid sharing
2959      on SUBREGs.  */
2960   op0 = force_reg (mode, op0);
2961
2962   /* ACCUM starts out either as OP0 or as a zero, depending on
2963      the first operation.  */
2964
2965   if (alg->op[0] == alg_zero)
2966     {
2967       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2968       val_so_far = 0;
2969     }
2970   else if (alg->op[0] == alg_m)
2971     {
2972       accum = copy_to_mode_reg (mode, op0);
2973       val_so_far = 1;
2974     }
2975   else
2976     gcc_unreachable ();
2977
2978   for (opno = 1; opno < alg->ops; opno++)
2979     {
2980       int log = alg->log[opno];
2981       rtx shift_subtarget = optimize ? 0 : accum;
2982       rtx add_target
2983         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2984            && !optimize)
2985           ? target : 0;
2986       rtx accum_target = optimize ? 0 : accum;
2987       rtx accum_inner;
2988
2989       switch (alg->op[opno])
2990         {
2991         case alg_shift:
2992           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2993           /* REG_EQUAL note will be attached to the following insn.  */
2994           emit_move_insn (accum, tem);
2995           val_so_far <<= log;
2996           break;
2997
2998         case alg_add_t_m2:
2999           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3000           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3001                                  add_target ? add_target : accum_target);
3002           val_so_far += (HOST_WIDE_INT) 1 << log;
3003           break;
3004
3005         case alg_sub_t_m2:
3006           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3007           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3008                                  add_target ? add_target : accum_target);
3009           val_so_far -= (HOST_WIDE_INT) 1 << log;
3010           break;
3011
3012         case alg_add_t2_m:
3013           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3014                                 log, shift_subtarget, 0);
3015           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3016                                  add_target ? add_target : accum_target);
3017           val_so_far = (val_so_far << log) + 1;
3018           break;
3019
3020         case alg_sub_t2_m:
3021           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3022                                 log, shift_subtarget, 0);
3023           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3024                                  add_target ? add_target : accum_target);
3025           val_so_far = (val_so_far << log) - 1;
3026           break;
3027
3028         case alg_add_factor:
3029           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3030           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3031                                  add_target ? add_target : accum_target);
3032           val_so_far += val_so_far << log;
3033           break;
3034
3035         case alg_sub_factor:
3036           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3037           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3038                                  (add_target
3039                                   ? add_target : (optimize ? 0 : tem)));
3040           val_so_far = (val_so_far << log) - val_so_far;
3041           break;
3042
3043         default:
3044           gcc_unreachable ();
3045         }
3046
3047       if (SCALAR_INT_MODE_P (mode))
3048         {
3049           /* Write a REG_EQUAL note on the last insn so that we can cse
3050              multiplication sequences.  Note that if ACCUM is a SUBREG,
3051              we've set the inner register and must properly indicate that.  */
3052           tem = op0, nmode = mode;
3053           accum_inner = accum;
3054           if (GET_CODE (accum) == SUBREG)
3055             {
3056               accum_inner = SUBREG_REG (accum);
3057               nmode = GET_MODE (accum_inner);
3058               tem = gen_lowpart (nmode, op0);
3059             }
3060
3061           insn = get_last_insn ();
3062           set_dst_reg_note (insn, REG_EQUAL,
3063                             gen_rtx_MULT (nmode, tem,
3064                                           gen_int_mode (val_so_far, nmode)),
3065                             accum_inner);
3066         }
3067     }
3068
3069   if (variant == negate_variant)
3070     {
3071       val_so_far = -val_so_far;
3072       accum = expand_unop (mode, neg_optab, accum, target, 0);
3073     }
3074   else if (variant == add_variant)
3075     {
3076       val_so_far = val_so_far + 1;
3077       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3078     }
3079
3080   /* Compare only the bits of val and val_so_far that are significant
3081      in the result mode, to avoid sign-/zero-extension confusion.  */
3082   nmode = GET_MODE_INNER (mode);
3083   if (nmode == VOIDmode)
3084     nmode = mode;
3085   val &= GET_MODE_MASK (nmode);
3086   val_so_far &= GET_MODE_MASK (nmode);
3087   gcc_assert (val == val_so_far);
3088
3089   return accum;
3090 }
3091
3092 /* Perform a multiplication and return an rtx for the result.
3093    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3094    TARGET is a suggestion for where to store the result (an rtx).
3095
3096    We check specially for a constant integer as OP1.
3097    If you want this check for OP0 as well, then before calling
3098    you should swap the two operands if OP0 would be constant.  */
3099
3100 rtx
3101 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3102              int unsignedp)
3103 {
3104   enum mult_variant variant;
3105   struct algorithm algorithm;
3106   rtx scalar_op1;
3107   int max_cost;
3108   bool speed = optimize_insn_for_speed_p ();
3109   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3110
3111   if (CONSTANT_P (op0))
3112     std::swap (op0, op1);
3113
3114   /* For vectors, there are several simplifications that can be made if
3115      all elements of the vector constant are identical.  */
3116   scalar_op1 = op1;
3117   if (GET_CODE (op1) == CONST_VECTOR)
3118     {
3119       int i, n = CONST_VECTOR_NUNITS (op1);
3120       scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3121       for (i = 1; i < n; ++i)
3122         if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3123           goto skip_scalar;
3124     }
3125
3126   if (INTEGRAL_MODE_P (mode))
3127     {
3128       rtx fake_reg;
3129       HOST_WIDE_INT coeff;
3130       bool is_neg;
3131       int mode_bitsize;
3132
3133       if (op1 == CONST0_RTX (mode))
3134         return op1;
3135       if (op1 == CONST1_RTX (mode))
3136         return op0;
3137       if (op1 == CONSTM1_RTX (mode))
3138         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3139                             op0, target, 0);
3140
3141       if (do_trapv)
3142         goto skip_synth;
3143
3144       /* If mode is integer vector mode, check if the backend supports
3145          vector lshift (by scalar or vector) at all.  If not, we can't use
3146          synthetized multiply.  */
3147       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3148           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3149           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3150         goto skip_synth;
3151
3152       /* These are the operations that are potentially turned into
3153          a sequence of shifts and additions.  */
3154       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3155
3156       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3157          less than or equal in size to `unsigned int' this doesn't matter.
3158          If the mode is larger than `unsigned int', then synth_mult works
3159          only if the constant value exactly fits in an `unsigned int' without
3160          any truncation.  This means that multiplying by negative values does
3161          not work; results are off by 2^32 on a 32 bit machine.  */
3162       if (CONST_INT_P (scalar_op1))
3163         {
3164           coeff = INTVAL (scalar_op1);
3165           is_neg = coeff < 0;
3166         }
3167 #if TARGET_SUPPORTS_WIDE_INT
3168       else if (CONST_WIDE_INT_P (scalar_op1))
3169 #else
3170       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3171 #endif
3172         {
3173           int shift = wi::exact_log2 (std::make_pair (scalar_op1, mode));
3174           /* Perfect power of 2 (other than 1, which is handled above).  */
3175           if (shift > 0)
3176             return expand_shift (LSHIFT_EXPR, mode, op0,
3177                                  shift, target, unsignedp);
3178           else
3179             goto skip_synth;
3180         }
3181       else
3182         goto skip_synth;
3183
3184       /* We used to test optimize here, on the grounds that it's better to
3185          produce a smaller program when -O is not used.  But this causes
3186          such a terrible slowdown sometimes that it seems better to always
3187          use synth_mult.  */
3188
3189       /* Special case powers of two.  */
3190       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3191           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3192         return expand_shift (LSHIFT_EXPR, mode, op0,
3193                              floor_log2 (coeff), target, unsignedp);
3194
3195       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3196
3197       /* Attempt to handle multiplication of DImode values by negative
3198          coefficients, by performing the multiplication by a positive
3199          multiplier and then inverting the result.  */
3200       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3201         {
3202           /* Its safe to use -coeff even for INT_MIN, as the
3203              result is interpreted as an unsigned coefficient.
3204              Exclude cost of op0 from max_cost to match the cost
3205              calculation of the synth_mult.  */
3206           coeff = -(unsigned HOST_WIDE_INT) coeff;
3207           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3208                       - neg_cost (speed, mode));
3209           if (max_cost <= 0)
3210             goto skip_synth;
3211
3212           /* Special case powers of two.  */
3213           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3214             {
3215               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3216                                        floor_log2 (coeff), target, unsignedp);
3217               return expand_unop (mode, neg_optab, temp, target, 0);
3218             }
3219
3220           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3221                                    max_cost))
3222             {
3223               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3224                                             &algorithm, variant);
3225               return expand_unop (mode, neg_optab, temp, target, 0);
3226             }
3227           goto skip_synth;
3228         }
3229
3230       /* Exclude cost of op0 from max_cost to match the cost
3231          calculation of the synth_mult.  */
3232       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3233       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3234         return expand_mult_const (mode, op0, coeff, target,
3235                                   &algorithm, variant);
3236     }
3237  skip_synth:
3238
3239   /* Expand x*2.0 as x+x.  */
3240   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3241     {
3242       REAL_VALUE_TYPE d;
3243       REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3244
3245       if (REAL_VALUES_EQUAL (d, dconst2))
3246         {
3247           op0 = force_reg (GET_MODE (op0), op0);
3248           return expand_binop (mode, add_optab, op0, op0,
3249                                target, unsignedp, OPTAB_LIB_WIDEN);
3250         }
3251     }
3252  skip_scalar:
3253
3254   /* This used to use umul_optab if unsigned, but for non-widening multiply
3255      there is no difference between signed and unsigned.  */
3256   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3257                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3258   gcc_assert (op0);
3259   return op0;
3260 }
3261
3262 /* Return a cost estimate for multiplying a register by the given
3263    COEFFicient in the given MODE and SPEED.  */
3264
3265 int
3266 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3267 {
3268   int max_cost;
3269   struct algorithm algorithm;
3270   enum mult_variant variant;
3271
3272   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3273   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3274   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3275     return algorithm.cost.cost;
3276   else
3277     return max_cost;
3278 }
3279
3280 /* Perform a widening multiplication and return an rtx for the result.
3281    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3282    TARGET is a suggestion for where to store the result (an rtx).
3283    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3284    or smul_widen_optab.
3285
3286    We check specially for a constant integer as OP1, comparing the
3287    cost of a widening multiply against the cost of a sequence of shifts
3288    and adds.  */
3289
3290 rtx
3291 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3292                       int unsignedp, optab this_optab)
3293 {
3294   bool speed = optimize_insn_for_speed_p ();
3295   rtx cop1;
3296
3297   if (CONST_INT_P (op1)
3298       && GET_MODE (op0) != VOIDmode
3299       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3300                                 this_optab == umul_widen_optab))
3301       && CONST_INT_P (cop1)
3302       && (INTVAL (cop1) >= 0
3303           || HWI_COMPUTABLE_MODE_P (mode)))
3304     {
3305       HOST_WIDE_INT coeff = INTVAL (cop1);
3306       int max_cost;
3307       enum mult_variant variant;
3308       struct algorithm algorithm;
3309
3310       if (coeff == 0)
3311         return CONST0_RTX (mode);
3312
3313       /* Special case powers of two.  */
3314       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3315         {
3316           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3317           return expand_shift (LSHIFT_EXPR, mode, op0,
3318                                floor_log2 (coeff), target, unsignedp);
3319         }
3320
3321       /* Exclude cost of op0 from max_cost to match the cost
3322          calculation of the synth_mult.  */
3323       max_cost = mul_widen_cost (speed, mode);
3324       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3325                                max_cost))
3326         {
3327           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3328           return expand_mult_const (mode, op0, coeff, target,
3329                                     &algorithm, variant);
3330         }
3331     }
3332   return expand_binop (mode, this_optab, op0, op1, target,
3333                        unsignedp, OPTAB_LIB_WIDEN);
3334 }
3335 \f
3336 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3337    replace division by D, and put the least significant N bits of the result
3338    in *MULTIPLIER_PTR and return the most significant bit.
3339
3340    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3341    needed precision is in PRECISION (should be <= N).
3342
3343    PRECISION should be as small as possible so this function can choose
3344    multiplier more freely.
3345
3346    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3347    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3348
3349    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3350    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3351
3352 unsigned HOST_WIDE_INT
3353 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3354                    unsigned HOST_WIDE_INT *multiplier_ptr,
3355                    int *post_shift_ptr, int *lgup_ptr)
3356 {
3357   int lgup, post_shift;
3358   int pow, pow2;
3359
3360   /* lgup = ceil(log2(divisor)); */
3361   lgup = ceil_log2 (d);
3362
3363   gcc_assert (lgup <= n);
3364
3365   pow = n + lgup;
3366   pow2 = n + lgup - precision;
3367
3368   /* mlow = 2^(N + lgup)/d */
3369   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3370   wide_int mlow = wi::udiv_trunc (val, d);
3371
3372   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3373   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3374   wide_int mhigh = wi::udiv_trunc (val, d);
3375
3376   /* If precision == N, then mlow, mhigh exceed 2^N
3377      (but they do not exceed 2^(N+1)).  */
3378
3379   /* Reduce to lowest terms.  */
3380   for (post_shift = lgup; post_shift > 0; post_shift--)
3381     {
3382       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3383                                                        HOST_BITS_PER_WIDE_INT);
3384       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3385                                                        HOST_BITS_PER_WIDE_INT);
3386       if (ml_lo >= mh_lo)
3387         break;
3388
3389       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3390       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3391     }
3392
3393   *post_shift_ptr = post_shift;
3394   *lgup_ptr = lgup;
3395   if (n < HOST_BITS_PER_WIDE_INT)
3396     {
3397       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3398       *multiplier_ptr = mhigh.to_uhwi () & mask;
3399       return mhigh.to_uhwi () >= mask;
3400     }
3401   else
3402     {
3403       *multiplier_ptr = mhigh.to_uhwi ();
3404       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3405     }
3406 }
3407
3408 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3409    congruent to 1 (mod 2**N).  */
3410
3411 static unsigned HOST_WIDE_INT
3412 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3413 {
3414   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3415
3416   /* The algorithm notes that the choice y = x satisfies
3417      x*y == 1 mod 2^3, since x is assumed odd.
3418      Each iteration doubles the number of bits of significance in y.  */
3419
3420   unsigned HOST_WIDE_INT mask;
3421   unsigned HOST_WIDE_INT y = x;
3422   int nbit = 3;
3423
3424   mask = (n == HOST_BITS_PER_WIDE_INT
3425           ? ~(unsigned HOST_WIDE_INT) 0
3426           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3427
3428   while (nbit < n)
3429     {
3430       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3431       nbit *= 2;
3432     }
3433   return y;
3434 }
3435
3436 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3437    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3438    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3439    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3440    become signed.
3441
3442    The result is put in TARGET if that is convenient.
3443
3444    MODE is the mode of operation.  */
3445
3446 rtx
3447 expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
3448                              rtx op1, rtx target, int unsignedp)
3449 {
3450   rtx tem;
3451   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3452
3453   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3454                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3455   tem = expand_and (mode, tem, op1, NULL_RTX);
3456   adj_operand
3457     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3458                      adj_operand);
3459
3460   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3461                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3462   tem = expand_and (mode, tem, op0, NULL_RTX);
3463   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3464                           target);
3465
3466   return target;
3467 }
3468
3469 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3470
3471 static rtx
3472 extract_high_half (machine_mode mode, rtx op)
3473 {
3474   machine_mode wider_mode;
3475
3476   if (mode == word_mode)
3477     return gen_highpart (mode, op);
3478
3479   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3480
3481   wider_mode = GET_MODE_WIDER_MODE (mode);
3482   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3483                      GET_MODE_BITSIZE (mode), 0, 1);
3484   return convert_modes (mode, wider_mode, op, 0);
3485 }
3486
3487 /* Like expmed_mult_highpart, but only consider using a multiplication
3488    optab.  OP1 is an rtx for the constant operand.  */
3489
3490 static rtx
3491 expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
3492                             rtx target, int unsignedp, int max_cost)
3493 {
3494   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3495   machine_mode wider_mode;
3496   optab moptab;
3497   rtx tem;
3498   int size;
3499   bool speed = optimize_insn_for_speed_p ();
3500
3501   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3502
3503   wider_mode = GET_MODE_WIDER_MODE (mode);
3504   size = GET_MODE_BITSIZE (mode);
3505
3506   /* Firstly, try using a multiplication insn that only generates the needed
3507      high part of the product, and in the sign flavor of unsignedp.  */
3508   if (mul_highpart_cost (speed, mode) < max_cost)
3509     {
3510       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3511       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3512                           unsignedp, OPTAB_DIRECT);
3513       if (tem)
3514         return tem;
3515     }
3516
3517   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3518      Need to adjust the result after the multiplication.  */
3519   if (size - 1 < BITS_PER_WORD
3520       && (mul_highpart_cost (speed, mode)
3521           + 2 * shift_cost (speed, mode, size-1)
3522           + 4 * add_cost (speed, mode) < max_cost))
3523     {
3524       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3525       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3526                           unsignedp, OPTAB_DIRECT);
3527       if (tem)
3528         /* We used the wrong signedness.  Adjust the result.  */
3529         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3530                                             tem, unsignedp);
3531     }
3532
3533   /* Try widening multiplication.  */
3534   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3535   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3536       && mul_widen_cost (speed, wider_mode) < max_cost)
3537     {
3538       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3539                           unsignedp, OPTAB_WIDEN);
3540       if (tem)
3541         return extract_high_half (mode, tem);
3542     }
3543
3544   /* Try widening the mode and perform a non-widening multiplication.  */
3545   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3546       && size - 1 < BITS_PER_WORD
3547       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3548           < max_cost))
3549     {
3550       rtx_insn *insns;
3551       rtx wop0, wop1;
3552
3553       /* We need to widen the operands, for example to ensure the
3554          constant multiplier is correctly sign or zero extended.
3555          Use a sequence to clean-up any instructions emitted by
3556          the conversions if things don't work out.  */
3557       start_sequence ();
3558       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3559       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3560       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3561                           unsignedp, OPTAB_WIDEN);
3562       insns = get_insns ();
3563       end_sequence ();
3564
3565       if (tem)
3566         {
3567           emit_insn (insns);
3568           return extract_high_half (mode, tem);
3569         }
3570     }
3571
3572   /* Try widening multiplication of opposite signedness, and adjust.  */
3573   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3574   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3575       && size - 1 < BITS_PER_WORD
3576       && (mul_widen_cost (speed, wider_mode)
3577           + 2 * shift_cost (speed, mode, size-1)
3578           + 4 * add_cost (speed, mode) < max_cost))
3579     {
3580       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3581                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3582       if (tem != 0)
3583         {
3584           tem = extract_high_half (mode, tem);
3585           /* We used the wrong signedness.  Adjust the result.  */
3586           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3587                                               target, unsignedp);
3588         }
3589     }
3590
3591   return 0;
3592 }
3593
3594 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3595    putting the high half of the result in TARGET if that is convenient,
3596    and return where the result is.  If the operation can not be performed,
3597    0 is returned.
3598
3599    MODE is the mode of operation and result.
3600
3601    UNSIGNEDP nonzero means unsigned multiply.
3602
3603    MAX_COST is the total allowed cost for the expanded RTL.  */
3604
3605 static rtx
3606 expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
3607                       rtx target, int unsignedp, int max_cost)
3608 {
3609   machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3610   unsigned HOST_WIDE_INT cnst1;
3611   int extra_cost;
3612   bool sign_adjust = false;
3613   enum mult_variant variant;
3614   struct algorithm alg;
3615   rtx tem;
3616   bool speed = optimize_insn_for_speed_p ();
3617
3618   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3619   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3620   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3621
3622   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3623
3624   /* We can't optimize modes wider than BITS_PER_WORD.
3625      ??? We might be able to perform double-word arithmetic if
3626      mode == word_mode, however all the cost calculations in
3627      synth_mult etc. assume single-word operations.  */
3628   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3629     return expmed_mult_highpart_optab (mode, op0, op1, target,
3630                                        unsignedp, max_cost);
3631
3632   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3633
3634   /* Check whether we try to multiply by a negative constant.  */
3635   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3636     {
3637       sign_adjust = true;
3638       extra_cost += add_cost (speed, mode);
3639     }
3640
3641   /* See whether shift/add multiplication is cheap enough.  */
3642   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3643                            max_cost - extra_cost))
3644     {
3645       /* See whether the specialized multiplication optabs are
3646          cheaper than the shift/add version.  */
3647       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3648                                         alg.cost.cost + extra_cost);
3649       if (tem)
3650         return tem;
3651
3652       tem = convert_to_mode (wider_mode, op0, unsignedp);
3653       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3654       tem = extract_high_half (mode, tem);
3655
3656       /* Adjust result for signedness.  */
3657       if (sign_adjust)
3658         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3659
3660       return tem;
3661     }
3662   return expmed_mult_highpart_optab (mode, op0, op1, target,
3663                                      unsignedp, max_cost);
3664 }
3665
3666
3667 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3668
3669 static rtx
3670 expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3671 {
3672   rtx result, temp, shift;
3673   rtx_code_label *label;
3674   int logd;
3675   int prec = GET_MODE_PRECISION (mode);
3676
3677   logd = floor_log2 (d);
3678   result = gen_reg_rtx (mode);
3679
3680   /* Avoid conditional branches when they're expensive.  */
3681   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3682       && optimize_insn_for_speed_p ())
3683     {
3684       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3685                                       mode, 0, -1);
3686       if (signmask)
3687         {
3688           HOST_WIDE_INT masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3689           signmask = force_reg (mode, signmask);
3690           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3691
3692           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3693              which instruction sequence to use.  If logical right shifts
3694              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3695              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3696
3697           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3698           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3699               || (set_src_cost (temp, optimize_insn_for_speed_p ())
3700                   > COSTS_N_INSNS (2)))
3701             {
3702               temp = expand_binop (mode, xor_optab, op0, signmask,
3703                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3704               temp = expand_binop (mode, sub_optab, temp, signmask,
3705                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3706               temp = expand_binop (mode, and_optab, temp,
3707                                    gen_int_mode (masklow, mode),
3708                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3709               temp = expand_binop (mode, xor_optab, temp, signmask,
3710                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3711               temp = expand_binop (mode, sub_optab, temp, signmask,
3712                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3713             }
3714           else
3715             {
3716               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3717                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3718               signmask = force_reg (mode, signmask);
3719
3720               temp = expand_binop (mode, add_optab, op0, signmask,
3721                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3722               temp = expand_binop (mode, and_optab, temp,
3723                                    gen_int_mode (masklow, mode),
3724                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3725               temp = expand_binop (mode, sub_optab, temp, signmask,
3726                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3727             }
3728           return temp;
3729         }
3730     }
3731
3732   /* Mask contains the mode's signbit and the significant bits of the
3733      modulus.  By including the signbit in the operation, many targets
3734      can avoid an explicit compare operation in the following comparison
3735      against zero.  */
3736   wide_int mask = wi::mask (logd, false, prec);
3737   mask = wi::set_bit (mask, prec - 1);
3738
3739   temp = expand_binop (mode, and_optab, op0,
3740                        immed_wide_int_const (mask, mode),
3741                        result, 1, OPTAB_LIB_WIDEN);
3742   if (temp != result)
3743     emit_move_insn (result, temp);
3744
3745   label = gen_label_rtx ();
3746   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3747
3748   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3749                        0, OPTAB_LIB_WIDEN);
3750
3751   mask = wi::mask (logd, true, prec);
3752   temp = expand_binop (mode, ior_optab, temp,
3753                        immed_wide_int_const (mask, mode),
3754                        result, 1, OPTAB_LIB_WIDEN);
3755   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3756                        0, OPTAB_LIB_WIDEN);
3757   if (temp != result)
3758     emit_move_insn (result, temp);
3759   emit_label (label);
3760   return result;
3761 }
3762
3763 /* Expand signed division of OP0 by a power of two D in mode MODE.
3764    This routine is only called for positive values of D.  */
3765
3766 static rtx
3767 expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3768 {
3769   rtx temp;
3770   rtx_code_label *label;
3771   int logd;
3772
3773   logd = floor_log2 (d);
3774
3775   if (d == 2
3776       && BRANCH_COST (optimize_insn_for_speed_p (),
3777                       false) >= 1)
3778     {
3779       temp = gen_reg_rtx (mode);
3780       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3781       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3782                            0, OPTAB_LIB_WIDEN);
3783       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3784     }
3785
3786 #ifdef HAVE_conditional_move
3787   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3788       >= 2)
3789     {
3790       rtx temp2;
3791
3792       start_sequence ();
3793       temp2 = copy_to_mode_reg (mode, op0);
3794       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3795                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3796       temp = force_reg (mode, temp);
3797
3798       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3799       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3800                                      mode, temp, temp2, mode, 0);
3801       if (temp2)
3802         {
3803           rtx_insn *seq = get_insns ();
3804           end_sequence ();
3805           emit_insn (seq);
3806           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3807         }
3808       end_sequence ();
3809     }
3810 #endif
3811
3812   if (BRANCH_COST (optimize_insn_for_speed_p (),
3813                    false) >= 2)
3814     {
3815       int ushift = GET_MODE_BITSIZE (mode) - logd;
3816
3817       temp = gen_reg_rtx (mode);
3818       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3819       if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3820           || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3821              > COSTS_N_INSNS (1))
3822         temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3823                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3824       else
3825         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3826                              ushift, NULL_RTX, 1);
3827       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3828                            0, OPTAB_LIB_WIDEN);
3829       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3830     }
3831
3832   label = gen_label_rtx ();
3833   temp = copy_to_mode_reg (mode, op0);
3834   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3835   expand_inc (temp, gen_int_mode (d - 1, mode));
3836   emit_label (label);
3837   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3838 }
3839 \f
3840 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3841    if that is convenient, and returning where the result is.
3842    You may request either the quotient or the remainder as the result;
3843    specify REM_FLAG nonzero to get the remainder.
3844
3845    CODE is the expression code for which kind of division this is;
3846    it controls how rounding is done.  MODE is the machine mode to use.
3847    UNSIGNEDP nonzero means do unsigned division.  */
3848
3849 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3850    and then correct it by or'ing in missing high bits
3851    if result of ANDI is nonzero.
3852    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3853    This could optimize to a bfexts instruction.
3854    But C doesn't use these operations, so their optimizations are
3855    left for later.  */
3856 /* ??? For modulo, we don't actually need the highpart of the first product,
3857    the low part will do nicely.  And for small divisors, the second multiply
3858    can also be a low-part only multiply or even be completely left out.
3859    E.g. to calculate the remainder of a division by 3 with a 32 bit
3860    multiply, multiply with 0x55555556 and extract the upper two bits;
3861    the result is exact for inputs up to 0x1fffffff.
3862    The input range can be reduced by using cross-sum rules.
3863    For odd divisors >= 3, the following table gives right shift counts
3864    so that if a number is shifted by an integer multiple of the given
3865    amount, the remainder stays the same:
3866    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3867    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3868    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3869    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3870    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3871
3872    Cross-sum rules for even numbers can be derived by leaving as many bits
3873    to the right alone as the divisor has zeros to the right.
3874    E.g. if x is an unsigned 32 bit number:
3875    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3876    */
3877
3878 rtx
3879 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
3880                rtx op0, rtx op1, rtx target, int unsignedp)
3881 {
3882   machine_mode compute_mode;
3883   rtx tquotient;
3884   rtx quotient = 0, remainder = 0;
3885   rtx_insn *last;
3886   int size;
3887   rtx_insn *insn;
3888   optab optab1, optab2;
3889   int op1_is_constant, op1_is_pow2 = 0;
3890   int max_cost, extra_cost;
3891   static HOST_WIDE_INT last_div_const = 0;
3892   bool speed = optimize_insn_for_speed_p ();
3893
3894   op1_is_constant = CONST_INT_P (op1);
3895   if (op1_is_constant)
3896     {
3897       unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1);
3898       if (unsignedp)
3899         ext_op1 &= GET_MODE_MASK (mode);
3900       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3901                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3902     }
3903
3904   /*
3905      This is the structure of expand_divmod:
3906
3907      First comes code to fix up the operands so we can perform the operations
3908      correctly and efficiently.
3909
3910      Second comes a switch statement with code specific for each rounding mode.
3911      For some special operands this code emits all RTL for the desired
3912      operation, for other cases, it generates only a quotient and stores it in
3913      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3914      to indicate that it has not done anything.
3915
3916      Last comes code that finishes the operation.  If QUOTIENT is set and
3917      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3918      QUOTIENT is not set, it is computed using trunc rounding.
3919
3920      We try to generate special code for division and remainder when OP1 is a
3921      constant.  If |OP1| = 2**n we can use shifts and some other fast
3922      operations.  For other values of OP1, we compute a carefully selected
3923      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3924      by m.
3925
3926      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3927      half of the product.  Different strategies for generating the product are
3928      implemented in expmed_mult_highpart.
3929
3930      If what we actually want is the remainder, we generate that by another
3931      by-constant multiplication and a subtraction.  */
3932
3933   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3934      code below will malfunction if we are, so check here and handle
3935      the special case if so.  */
3936   if (op1 == const1_rtx)
3937     return rem_flag ? const0_rtx : op0;
3938
3939     /* When dividing by -1, we could get an overflow.
3940      negv_optab can handle overflows.  */
3941   if (! unsignedp && op1 == constm1_rtx)
3942     {
3943       if (rem_flag)
3944         return const0_rtx;
3945       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
3946                           ? negv_optab : neg_optab, op0, target, 0);
3947     }
3948
3949   if (target
3950       /* Don't use the function value register as a target
3951          since we have to read it as well as write it,
3952          and function-inlining gets confused by this.  */
3953       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3954           /* Don't clobber an operand while doing a multi-step calculation.  */
3955           || ((rem_flag || op1_is_constant)
3956               && (reg_mentioned_p (target, op0)
3957                   || (MEM_P (op0) && MEM_P (target))))
3958           || reg_mentioned_p (target, op1)
3959           || (MEM_P (op1) && MEM_P (target))))
3960     target = 0;
3961
3962   /* Get the mode in which to perform this computation.  Normally it will
3963      be MODE, but sometimes we can't do the desired operation in MODE.
3964      If so, pick a wider mode in which we can do the operation.  Convert
3965      to that mode at the start to avoid repeated conversions.
3966
3967      First see what operations we need.  These depend on the expression
3968      we are evaluating.  (We assume that divxx3 insns exist under the
3969      same conditions that modxx3 insns and that these insns don't normally
3970      fail.  If these assumptions are not correct, we may generate less
3971      efficient code in some cases.)
3972
3973      Then see if we find a mode in which we can open-code that operation
3974      (either a division, modulus, or shift).  Finally, check for the smallest
3975      mode for which we can do the operation with a library call.  */
3976
3977   /* We might want to refine this now that we have division-by-constant
3978      optimization.  Since expmed_mult_highpart tries so many variants, it is
3979      not straightforward to generalize this.  Maybe we should make an array
3980      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3981
3982   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3983             ? (unsignedp ? lshr_optab : ashr_optab)
3984             : (unsignedp ? udiv_optab : sdiv_optab));
3985   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3986             ? optab1
3987             : (unsignedp ? udivmod_optab : sdivmod_optab));
3988
3989   for (compute_mode = mode; compute_mode != VOIDmode;
3990        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3991     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3992         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3993       break;
3994
3995   if (compute_mode == VOIDmode)
3996     for (compute_mode = mode; compute_mode != VOIDmode;
3997          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3998       if (optab_libfunc (optab1, compute_mode)
3999           || optab_libfunc (optab2, compute_mode))
4000         break;
4001
4002   /* If we still couldn't find a mode, use MODE, but expand_binop will
4003      probably die.  */
4004   if (compute_mode == VOIDmode)
4005     compute_mode = mode;
4006
4007   if (target && GET_MODE (target) == compute_mode)
4008     tquotient = target;
4009   else
4010     tquotient = gen_reg_rtx (compute_mode);
4011
4012   size = GET_MODE_BITSIZE (compute_mode);
4013 #if 0
4014   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4015      (mode), and thereby get better code when OP1 is a constant.  Do that
4016      later.  It will require going over all usages of SIZE below.  */
4017   size = GET_MODE_BITSIZE (mode);
4018 #endif
4019
4020   /* Only deduct something for a REM if the last divide done was
4021      for a different constant.   Then set the constant of the last
4022      divide.  */
4023   max_cost = (unsignedp
4024               ? udiv_cost (speed, compute_mode)
4025               : sdiv_cost (speed, compute_mode));
4026   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4027                      && INTVAL (op1) == last_div_const))
4028     max_cost -= (mul_cost (speed, compute_mode)
4029                  + add_cost (speed, compute_mode));
4030
4031   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4032
4033   /* Now convert to the best mode to use.  */
4034   if (compute_mode != mode)
4035     {
4036       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4037       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4038
4039       /* convert_modes may have placed op1 into a register, so we
4040          must recompute the following.  */
4041       op1_is_constant = CONST_INT_P (op1);
4042       op1_is_pow2 = (op1_is_constant
4043                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4044                           || (! unsignedp
4045                               && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1))))));
4046     }
4047
4048   /* If one of the operands is a volatile MEM, copy it into a register.  */
4049
4050   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4051     op0 = force_reg (compute_mode, op0);
4052   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4053     op1 = force_reg (compute_mode, op1);
4054
4055   /* If we need the remainder or if OP1 is constant, we need to
4056      put OP0 in a register in case it has any queued subexpressions.  */
4057   if (rem_flag || op1_is_constant)
4058     op0 = force_reg (compute_mode, op0);
4059
4060   last = get_last_insn ();
4061
4062   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4063   if (unsignedp)
4064     {
4065       if (code == FLOOR_DIV_EXPR)
4066         code = TRUNC_DIV_EXPR;
4067       if (code == FLOOR_MOD_EXPR)
4068         code = TRUNC_MOD_EXPR;
4069       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4070         code = TRUNC_DIV_EXPR;
4071     }
4072
4073   if (op1 != const0_rtx)
4074     switch (code)
4075       {
4076       case TRUNC_MOD_EXPR:
4077       case TRUNC_DIV_EXPR:
4078         if (op1_is_constant)
4079           {
4080             if (unsignedp)
4081               {
4082                 unsigned HOST_WIDE_INT mh, ml;
4083                 int pre_shift, post_shift;
4084                 int dummy;
4085                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4086                                             & GET_MODE_MASK (compute_mode));
4087
4088                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4089                   {
4090                     pre_shift = floor_log2 (d);
4091                     if (rem_flag)
4092                       {
4093                         unsigned HOST_WIDE_INT mask
4094                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4095                         remainder
4096                           = expand_binop (compute_mode, and_optab, op0,
4097                                           gen_int_mode (mask, compute_mode),
4098                                           remainder, 1,
4099                                           OPTAB_LIB_WIDEN);
4100                         if (remainder)
4101                           return gen_lowpart (mode, remainder);
4102                       }
4103                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4104                                              pre_shift, tquotient, 1);
4105                   }
4106                 else if (size <= HOST_BITS_PER_WIDE_INT)
4107                   {
4108                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4109                       {
4110                         /* Most significant bit of divisor is set; emit an scc
4111                            insn.  */
4112                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4113                                                           compute_mode, 1, 1);
4114                       }
4115                     else
4116                       {
4117                         /* Find a suitable multiplier and right shift count
4118                            instead of multiplying with D.  */
4119
4120                         mh = choose_multiplier (d, size, size,
4121                                                 &ml, &post_shift, &dummy);
4122
4123                         /* If the suggested multiplier is more than SIZE bits,
4124                            we can do better for even divisors, using an
4125                            initial right shift.  */
4126                         if (mh != 0 && (d & 1) == 0)
4127                           {
4128                             pre_shift = floor_log2 (d & -d);
4129                             mh = choose_multiplier (d >> pre_shift, size,
4130                                                     size - pre_shift,
4131                                                     &ml, &post_shift, &dummy);
4132                             gcc_assert (!mh);
4133                           }
4134                         else
4135                           pre_shift = 0;
4136
4137                         if (mh != 0)
4138                           {
4139                             rtx t1, t2, t3, t4;
4140
4141                             if (post_shift - 1 >= BITS_PER_WORD)
4142                               goto fail1;
4143
4144                             extra_cost
4145                               = (shift_cost (speed, compute_mode, post_shift - 1)
4146                                  + shift_cost (speed, compute_mode, 1)
4147                                  + 2 * add_cost (speed, compute_mode));
4148                             t1 = expmed_mult_highpart
4149                               (compute_mode, op0,
4150                                gen_int_mode (ml, compute_mode),
4151                                NULL_RTX, 1, max_cost - extra_cost);
4152                             if (t1 == 0)
4153                               goto fail1;
4154                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4155                                                                op0, t1),
4156                                                 NULL_RTX);
4157                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4158                                                t2, 1, NULL_RTX, 1);
4159                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4160                                                               t1, t3),
4161                                                 NULL_RTX);
4162                             quotient = expand_shift
4163                               (RSHIFT_EXPR, compute_mode, t4,
4164                                post_shift - 1, tquotient, 1);
4165                           }
4166                         else
4167                           {
4168                             rtx t1, t2;
4169
4170                             if (pre_shift >= BITS_PER_WORD
4171                                 || post_shift >= BITS_PER_WORD)
4172                               goto fail1;
4173
4174                             t1 = expand_shift
4175                               (RSHIFT_EXPR, compute_mode, op0,
4176                                pre_shift, NULL_RTX, 1);
4177                             extra_cost
4178                               = (shift_cost (speed, compute_mode, pre_shift)
4179                                  + shift_cost (speed, compute_mode, post_shift));
4180                             t2 = expmed_mult_highpart
4181                               (compute_mode, t1,
4182                                gen_int_mode (ml, compute_mode),
4183                                NULL_RTX, 1, max_cost - extra_cost);
4184                             if (t2 == 0)
4185                               goto fail1;
4186                             quotient = expand_shift
4187                               (RSHIFT_EXPR, compute_mode, t2,
4188                                post_shift, tquotient, 1);
4189                           }
4190                       }
4191                   }
4192                 else            /* Too wide mode to use tricky code */
4193                   break;
4194
4195                 insn = get_last_insn ();
4196                 if (insn != last)
4197                   set_dst_reg_note (insn, REG_EQUAL,
4198                                     gen_rtx_UDIV (compute_mode, op0, op1),
4199                                     quotient);
4200               }
4201             else                /* TRUNC_DIV, signed */
4202               {
4203                 unsigned HOST_WIDE_INT ml;
4204                 int lgup, post_shift;
4205                 rtx mlr;
4206                 HOST_WIDE_INT d = INTVAL (op1);
4207                 unsigned HOST_WIDE_INT abs_d;
4208
4209                 /* Since d might be INT_MIN, we have to cast to
4210                    unsigned HOST_WIDE_INT before negating to avoid
4211                    undefined signed overflow.  */
4212                 abs_d = (d >= 0
4213                          ? (unsigned HOST_WIDE_INT) d
4214                          : - (unsigned HOST_WIDE_INT) d);
4215
4216                 /* n rem d = n rem -d */
4217                 if (rem_flag && d < 0)
4218                   {
4219                     d = abs_d;
4220                     op1 = gen_int_mode (abs_d, compute_mode);
4221                   }
4222
4223                 if (d == 1)
4224                   quotient = op0;
4225                 else if (d == -1)
4226                   quotient = expand_unop (compute_mode, neg_optab, op0,
4227                                           tquotient, 0);
4228                 else if (HOST_BITS_PER_WIDE_INT >= size
4229                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4230                   {
4231                     /* This case is not handled correctly below.  */
4232                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4233                                                 compute_mode, 1, 1);
4234                     if (quotient == 0)
4235                       goto fail1;
4236                   }
4237                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4238                          && (rem_flag
4239                              ? smod_pow2_cheap (speed, compute_mode)
4240                              : sdiv_pow2_cheap (speed, compute_mode))
4241                          /* We assume that cheap metric is true if the
4242                             optab has an expander for this mode.  */
4243                          && ((optab_handler ((rem_flag ? smod_optab
4244                                               : sdiv_optab),
4245                                              compute_mode)
4246                               != CODE_FOR_nothing)
4247                              || (optab_handler (sdivmod_optab,
4248                                                 compute_mode)
4249                                  != CODE_FOR_nothing)))
4250                   ;
4251                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4252                   {
4253                     if (rem_flag)
4254                       {
4255                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4256                         if (remainder)
4257                           return gen_lowpart (mode, remainder);
4258                       }
4259
4260                     if (sdiv_pow2_cheap (speed, compute_mode)
4261                         && ((optab_handler (sdiv_optab, compute_mode)
4262                              != CODE_FOR_nothing)
4263                             || (optab_handler (sdivmod_optab, compute_mode)
4264                                 != CODE_FOR_nothing)))
4265                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4266                                                 compute_mode, op0,
4267                                                 gen_int_mode (abs_d,
4268                                                               compute_mode),
4269                                                 NULL_RTX, 0);
4270                     else
4271                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4272
4273                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4274                        negate the quotient.  */
4275                     if (d < 0)
4276                       {
4277                         insn = get_last_insn ();
4278                         if (insn != last
4279                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4280                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4281                           set_dst_reg_note (insn, REG_EQUAL,
4282                                             gen_rtx_DIV (compute_mode, op0,
4283                                                          gen_int_mode
4284                                                            (abs_d,
4285                                                             compute_mode)),
4286                                             quotient);
4287
4288                         quotient = expand_unop (compute_mode, neg_optab,
4289                                                 quotient, quotient, 0);
4290                       }
4291                   }
4292                 else if (size <= HOST_BITS_PER_WIDE_INT)
4293                   {
4294                     choose_multiplier (abs_d, size, size - 1,
4295                                        &ml, &post_shift, &lgup);
4296                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4297                       {
4298                         rtx t1, t2, t3;
4299
4300                         if (post_shift >= BITS_PER_WORD
4301                             || size - 1 >= BITS_PER_WORD)
4302                           goto fail1;
4303
4304                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4305                                       + shift_cost (speed, compute_mode, size - 1)
4306                                       + add_cost (speed, compute_mode));
4307                         t1 = expmed_mult_highpart
4308                           (compute_mode, op0, gen_int_mode (ml, compute_mode),
4309                            NULL_RTX, 0, max_cost - extra_cost);
4310                         if (t1 == 0)
4311                           goto fail1;
4312                         t2 = expand_shift
4313                           (RSHIFT_EXPR, compute_mode, t1,
4314                            post_shift, NULL_RTX, 0);
4315                         t3 = expand_shift
4316                           (RSHIFT_EXPR, compute_mode, op0,
4317                            size - 1, NULL_RTX, 0);
4318                         if (d < 0)
4319                           quotient
4320                             = force_operand (gen_rtx_MINUS (compute_mode,
4321                                                             t3, t2),
4322                                              tquotient);
4323                         else
4324                           quotient
4325                             = force_operand (gen_rtx_MINUS (compute_mode,
4326                                                             t2, t3),
4327                                              tquotient);
4328                       }
4329                     else
4330                       {
4331                         rtx t1, t2, t3, t4;
4332
4333                         if (post_shift >= BITS_PER_WORD
4334                             || size - 1 >= BITS_PER_WORD)
4335                           goto fail1;
4336
4337                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4338                         mlr = gen_int_mode (ml, compute_mode);
4339                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4340                                       + shift_cost (speed, compute_mode, size - 1)
4341                                       + 2 * add_cost (speed, compute_mode));
4342                         t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4343                                                    NULL_RTX, 0,
4344                                                    max_cost - extra_cost);
4345                         if (t1 == 0)
4346                           goto fail1;
4347                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4348                                                           t1, op0),
4349                                             NULL_RTX);
4350                         t3 = expand_shift
4351                           (RSHIFT_EXPR, compute_mode, t2,
4352                            post_shift, NULL_RTX, 0);
4353                         t4 = expand_shift
4354                           (RSHIFT_EXPR, compute_mode, op0,
4355                            size - 1, NULL_RTX, 0);
4356                         if (d < 0)
4357                           quotient
4358                             = force_operand (gen_rtx_MINUS (compute_mode,
4359                                                             t4, t3),
4360                                              tquotient);
4361                         else
4362                           quotient
4363                             = force_operand (gen_rtx_MINUS (compute_mode,
4364                                                             t3, t4),
4365                                              tquotient);
4366                       }
4367                   }
4368                 else            /* Too wide mode to use tricky code */
4369                   break;
4370
4371                 insn = get_last_insn ();
4372                 if (insn != last)
4373                   set_dst_reg_note (insn, REG_EQUAL,
4374                                     gen_rtx_DIV (compute_mode, op0, op1),
4375                                     quotient);
4376               }
4377             break;
4378           }
4379       fail1:
4380         delete_insns_since (last);
4381         break;
4382
4383       case FLOOR_DIV_EXPR:
4384       case FLOOR_MOD_EXPR:
4385       /* We will come here only for signed operations.  */
4386         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4387           {
4388             unsigned HOST_WIDE_INT mh, ml;
4389             int pre_shift, lgup, post_shift;
4390             HOST_WIDE_INT d = INTVAL (op1);
4391
4392             if (d > 0)
4393               {
4394                 /* We could just as easily deal with negative constants here,
4395                    but it does not seem worth the trouble for GCC 2.6.  */
4396                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4397                   {
4398                     pre_shift = floor_log2 (d);
4399                     if (rem_flag)
4400                       {
4401                         unsigned HOST_WIDE_INT mask
4402                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4403                         remainder = expand_binop
4404                           (compute_mode, and_optab, op0,
4405                            gen_int_mode (mask, compute_mode),
4406                            remainder, 0, OPTAB_LIB_WIDEN);
4407                         if (remainder)
4408                           return gen_lowpart (mode, remainder);
4409                       }
4410                     quotient = expand_shift
4411                       (RSHIFT_EXPR, compute_mode, op0,
4412                        pre_shift, tquotient, 0);
4413                   }
4414                 else
4415                   {
4416                     rtx t1, t2, t3, t4;
4417
4418                     mh = choose_multiplier (d, size, size - 1,
4419                                             &ml, &post_shift, &lgup);
4420                     gcc_assert (!mh);
4421
4422                     if (post_shift < BITS_PER_WORD
4423                         && size - 1 < BITS_PER_WORD)
4424                       {
4425                         t1 = expand_shift
4426                           (RSHIFT_EXPR, compute_mode, op0,
4427                            size - 1, NULL_RTX, 0);
4428                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4429                                            NULL_RTX, 0, OPTAB_WIDEN);
4430                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4431                                       + shift_cost (speed, compute_mode, size - 1)
4432                                       + 2 * add_cost (speed, compute_mode));
4433                         t3 = expmed_mult_highpart
4434                           (compute_mode, t2, gen_int_mode (ml, compute_mode),
4435                            NULL_RTX, 1, max_cost - extra_cost);
4436                         if (t3 != 0)
4437                           {
4438                             t4 = expand_shift
4439                               (RSHIFT_EXPR, compute_mode, t3,
4440                                post_shift, NULL_RTX, 1);
4441                             quotient = expand_binop (compute_mode, xor_optab,
4442                                                      t4, t1, tquotient, 0,
4443                                                      OPTAB_WIDEN);
4444                           }
4445                       }
4446                   }
4447               }
4448             else
4449               {
4450                 rtx nsign, t1, t2, t3, t4;
4451                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4452                                                   op0, constm1_rtx), NULL_RTX);
4453                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4454                                    0, OPTAB_WIDEN);
4455                 nsign = expand_shift
4456                   (RSHIFT_EXPR, compute_mode, t2,
4457                    size - 1, NULL_RTX, 0);
4458                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4459                                     NULL_RTX);
4460                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4461                                     NULL_RTX, 0);
4462                 if (t4)
4463                   {
4464                     rtx t5;
4465                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4466                                       NULL_RTX, 0);
4467                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4468                                                             t4, t5),
4469                                               tquotient);
4470                   }
4471               }
4472           }
4473
4474         if (quotient != 0)
4475           break;
4476         delete_insns_since (last);
4477
4478         /* Try using an instruction that produces both the quotient and
4479            remainder, using truncation.  We can easily compensate the quotient
4480            or remainder to get floor rounding, once we have the remainder.
4481            Notice that we compute also the final remainder value here,
4482            and return the result right away.  */
4483         if (target == 0 || GET_MODE (target) != compute_mode)
4484           target = gen_reg_rtx (compute_mode);
4485
4486         if (rem_flag)
4487           {
4488             remainder
4489               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4490             quotient = gen_reg_rtx (compute_mode);
4491           }
4492         else
4493           {
4494             quotient
4495               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4496             remainder = gen_reg_rtx (compute_mode);
4497           }
4498
4499         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4500                                  quotient, remainder, 0))
4501           {
4502             /* This could be computed with a branch-less sequence.
4503                Save that for later.  */
4504             rtx tem;
4505             rtx_code_label *label = gen_label_rtx ();
4506             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4507             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4508                                 NULL_RTX, 0, OPTAB_WIDEN);
4509             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4510             expand_dec (quotient, const1_rtx);
4511             expand_inc (remainder, op1);
4512             emit_label (label);
4513             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4514           }
4515
4516         /* No luck with division elimination or divmod.  Have to do it
4517            by conditionally adjusting op0 *and* the result.  */
4518         {
4519           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4520           rtx adjusted_op0;
4521           rtx tem;
4522
4523           quotient = gen_reg_rtx (compute_mode);
4524           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4525           label1 = gen_label_rtx ();
4526           label2 = gen_label_rtx ();
4527           label3 = gen_label_rtx ();
4528           label4 = gen_label_rtx ();
4529           label5 = gen_label_rtx ();
4530           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4531           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4532           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4533                               quotient, 0, OPTAB_LIB_WIDEN);
4534           if (tem != quotient)
4535             emit_move_insn (quotient, tem);
4536           emit_jump_insn (gen_jump (label5));
4537           emit_barrier ();
4538           emit_label (label1);
4539           expand_inc (adjusted_op0, const1_rtx);
4540           emit_jump_insn (gen_jump (label4));
4541           emit_barrier ();
4542           emit_label (label2);
4543           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4544           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4545                               quotient, 0, OPTAB_LIB_WIDEN);
4546           if (tem != quotient)
4547             emit_move_insn (quotient, tem);
4548           emit_jump_insn (gen_jump (label5));
4549           emit_barrier ();
4550           emit_label (label3);
4551           expand_dec (adjusted_op0, const1_rtx);
4552           emit_label (label4);
4553           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4554                               quotient, 0, OPTAB_LIB_WIDEN);
4555           if (tem != quotient)
4556             emit_move_insn (quotient, tem);
4557           expand_dec (quotient, const1_rtx);
4558           emit_label (label5);
4559         }
4560         break;
4561
4562       case CEIL_DIV_EXPR:
4563       case CEIL_MOD_EXPR:
4564         if (unsignedp)
4565           {
4566             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4567               {
4568                 rtx t1, t2, t3;
4569                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4570                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4571                                    floor_log2 (d), tquotient, 1);
4572                 t2 = expand_binop (compute_mode, and_optab, op0,
4573                                    gen_int_mode (d - 1, compute_mode),
4574                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4575                 t3 = gen_reg_rtx (compute_mode);
4576                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4577                                       compute_mode, 1, 1);
4578                 if (t3 == 0)
4579                   {
4580                     rtx_code_label *lab;
4581                     lab = gen_label_rtx ();
4582                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4583                     expand_inc (t1, const1_rtx);
4584                     emit_label (lab);
4585                     quotient = t1;
4586                   }
4587                 else
4588                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4589                                                           t1, t3),
4590                                             tquotient);
4591                 break;
4592               }
4593
4594             /* Try using an instruction that produces both the quotient and
4595                remainder, using truncation.  We can easily compensate the
4596                quotient or remainder to get ceiling rounding, once we have the
4597                remainder.  Notice that we compute also the final remainder
4598                value here, and return the result right away.  */
4599             if (target == 0 || GET_MODE (target) != compute_mode)
4600               target = gen_reg_rtx (compute_mode);
4601
4602             if (rem_flag)
4603               {
4604                 remainder = (REG_P (target)
4605                              ? target : gen_reg_rtx (compute_mode));
4606                 quotient = gen_reg_rtx (compute_mode);
4607               }
4608             else
4609               {
4610                 quotient = (REG_P (target)
4611                             ? target : gen_reg_rtx (compute_mode));
4612                 remainder = gen_reg_rtx (compute_mode);
4613               }
4614
4615             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4616                                      remainder, 1))
4617               {
4618                 /* This could be computed with a branch-less sequence.
4619                    Save that for later.  */
4620                 rtx_code_label *label = gen_label_rtx ();
4621                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4622                                  compute_mode, label);
4623                 expand_inc (quotient, const1_rtx);
4624                 expand_dec (remainder, op1);
4625                 emit_label (label);
4626                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4627               }
4628
4629             /* No luck with division elimination or divmod.  Have to do it
4630                by conditionally adjusting op0 *and* the result.  */
4631             {
4632               rtx_code_label *label1, *label2;
4633               rtx adjusted_op0, tem;
4634
4635               quotient = gen_reg_rtx (compute_mode);
4636               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4637               label1 = gen_label_rtx ();
4638               label2 = gen_label_rtx ();
4639               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4640                                compute_mode, label1);
4641               emit_move_insn  (quotient, const0_rtx);
4642               emit_jump_insn (gen_jump (label2));
4643               emit_barrier ();
4644               emit_label (label1);
4645               expand_dec (adjusted_op0, const1_rtx);
4646               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4647                                   quotient, 1, OPTAB_LIB_WIDEN);
4648               if (tem != quotient)
4649                 emit_move_insn (quotient, tem);
4650               expand_inc (quotient, const1_rtx);
4651               emit_label (label2);
4652             }
4653           }
4654         else /* signed */
4655           {
4656             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4657                 && INTVAL (op1) >= 0)
4658               {
4659                 /* This is extremely similar to the code for the unsigned case
4660                    above.  For 2.7 we should merge these variants, but for
4661                    2.6.1 I don't want to touch the code for unsigned since that
4662                    get used in C.  The signed case will only be used by other
4663                    languages (Ada).  */
4664
4665                 rtx t1, t2, t3;
4666                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4667                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4668                                    floor_log2 (d), tquotient, 0);
4669                 t2 = expand_binop (compute_mode, and_optab, op0,
4670                                    gen_int_mode (d - 1, compute_mode),
4671                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4672                 t3 = gen_reg_rtx (compute_mode);
4673                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4674                                       compute_mode, 1, 1);
4675                 if (t3 == 0)
4676                   {
4677                     rtx_code_label *lab;
4678                     lab = gen_label_rtx ();
4679                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4680                     expand_inc (t1, const1_rtx);
4681                     emit_label (lab);
4682                     quotient = t1;
4683                   }
4684                 else
4685                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4686                                                           t1, t3),
4687                                             tquotient);
4688                 break;
4689               }
4690
4691             /* Try using an instruction that produces both the quotient and
4692                remainder, using truncation.  We can easily compensate the
4693                quotient or remainder to get ceiling rounding, once we have the
4694                remainder.  Notice that we compute also the final remainder
4695                value here, and return the result right away.  */
4696             if (target == 0 || GET_MODE (target) != compute_mode)
4697               target = gen_reg_rtx (compute_mode);
4698             if (rem_flag)
4699               {
4700                 remainder= (REG_P (target)
4701                             ? target : gen_reg_rtx (compute_mode));
4702                 quotient = gen_reg_rtx (compute_mode);
4703               }
4704             else
4705               {
4706                 quotient = (REG_P (target)
4707                             ? target : gen_reg_rtx (compute_mode));
4708                 remainder = gen_reg_rtx (compute_mode);
4709               }
4710
4711             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4712                                      remainder, 0))
4713               {
4714                 /* This could be computed with a branch-less sequence.
4715                    Save that for later.  */
4716                 rtx tem;
4717                 rtx_code_label *label = gen_label_rtx ();
4718                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4719                                  compute_mode, label);
4720                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4721                                     NULL_RTX, 0, OPTAB_WIDEN);
4722                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4723                 expand_inc (quotient, const1_rtx);
4724                 expand_dec (remainder, op1);
4725                 emit_label (label);
4726                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4727               }
4728
4729             /* No luck with division elimination or divmod.  Have to do it
4730                by conditionally adjusting op0 *and* the result.  */
4731             {
4732               rtx_code_label *label1, *label2, *label3, *label4, *label5;
4733               rtx adjusted_op0;
4734               rtx tem;
4735
4736               quotient = gen_reg_rtx (compute_mode);
4737               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4738               label1 = gen_label_rtx ();
4739               label2 = gen_label_rtx ();
4740               label3 = gen_label_rtx ();
4741               label4 = gen_label_rtx ();
4742               label5 = gen_label_rtx ();
4743               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4744               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4745                                compute_mode, label1);
4746               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4747                                   quotient, 0, OPTAB_LIB_WIDEN);
4748               if (tem != quotient)
4749                 emit_move_insn (quotient, tem);
4750               emit_jump_insn (gen_jump (label5));
4751               emit_barrier ();
4752               emit_label (label1);
4753               expand_dec (adjusted_op0, const1_rtx);
4754               emit_jump_insn (gen_jump (label4));
4755               emit_barrier ();
4756               emit_label (label2);
4757               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4758                                compute_mode, label3);
4759               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4760                                   quotient, 0, OPTAB_LIB_WIDEN);
4761               if (tem != quotient)
4762                 emit_move_insn (quotient, tem);
4763               emit_jump_insn (gen_jump (label5));
4764               emit_barrier ();
4765               emit_label (label3);
4766               expand_inc (adjusted_op0, const1_rtx);
4767               emit_label (label4);
4768               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4769                                   quotient, 0, OPTAB_LIB_WIDEN);
4770               if (tem != quotient)
4771                 emit_move_insn (quotient, tem);
4772               expand_inc (quotient, const1_rtx);
4773               emit_label (label5);
4774             }
4775           }
4776         break;
4777
4778       case EXACT_DIV_EXPR:
4779         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4780           {
4781             HOST_WIDE_INT d = INTVAL (op1);
4782             unsigned HOST_WIDE_INT ml;
4783             int pre_shift;
4784             rtx t1;
4785
4786             pre_shift = floor_log2 (d & -d);
4787             ml = invert_mod2n (d >> pre_shift, size);
4788             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4789                                pre_shift, NULL_RTX, unsignedp);
4790             quotient = expand_mult (compute_mode, t1,
4791                                     gen_int_mode (ml, compute_mode),
4792                                     NULL_RTX, 1);
4793
4794             insn = get_last_insn ();
4795             set_dst_reg_note (insn, REG_EQUAL,
4796                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4797                                               compute_mode, op0, op1),
4798                               quotient);
4799           }
4800         break;
4801
4802       case ROUND_DIV_EXPR:
4803       case ROUND_MOD_EXPR:
4804         if (unsignedp)
4805           {
4806             rtx tem;
4807             rtx_code_label *label;
4808             label = gen_label_rtx ();
4809             quotient = gen_reg_rtx (compute_mode);
4810             remainder = gen_reg_rtx (compute_mode);
4811             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4812               {
4813                 rtx tem;
4814                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4815                                          quotient, 1, OPTAB_LIB_WIDEN);
4816                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4817                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4818                                           remainder, 1, OPTAB_LIB_WIDEN);
4819               }
4820             tem = plus_constant (compute_mode, op1, -1);
4821             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4822             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4823             expand_inc (quotient, const1_rtx);
4824             expand_dec (remainder, op1);
4825             emit_label (label);
4826           }
4827         else
4828           {
4829             rtx abs_rem, abs_op1, tem, mask;
4830             rtx_code_label *label;
4831             label = gen_label_rtx ();
4832             quotient = gen_reg_rtx (compute_mode);
4833             remainder = gen_reg_rtx (compute_mode);
4834             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4835               {
4836                 rtx tem;
4837                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4838                                          quotient, 0, OPTAB_LIB_WIDEN);
4839                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4840                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4841                                           remainder, 0, OPTAB_LIB_WIDEN);
4842               }
4843             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4844             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4845             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4846                                 1, NULL_RTX, 1);
4847             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4848             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4849                                 NULL_RTX, 0, OPTAB_WIDEN);
4850             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4851                                  size - 1, NULL_RTX, 0);
4852             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4853                                 NULL_RTX, 0, OPTAB_WIDEN);
4854             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4855                                 NULL_RTX, 0, OPTAB_WIDEN);
4856             expand_inc (quotient, tem);
4857             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4858                                 NULL_RTX, 0, OPTAB_WIDEN);
4859             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4860                                 NULL_RTX, 0, OPTAB_WIDEN);
4861             expand_dec (remainder, tem);
4862             emit_label (label);
4863           }
4864         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4865
4866       default:
4867         gcc_unreachable ();
4868       }
4869
4870   if (quotient == 0)
4871     {
4872       if (target && GET_MODE (target) != compute_mode)
4873         target = 0;
4874
4875       if (rem_flag)
4876         {
4877           /* Try to produce the remainder without producing the quotient.
4878              If we seem to have a divmod pattern that does not require widening,
4879              don't try widening here.  We should really have a WIDEN argument
4880              to expand_twoval_binop, since what we'd really like to do here is
4881              1) try a mod insn in compute_mode
4882              2) try a divmod insn in compute_mode
4883              3) try a div insn in compute_mode and multiply-subtract to get
4884                 remainder
4885              4) try the same things with widening allowed.  */
4886           remainder
4887             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4888                                  op0, op1, target,
4889                                  unsignedp,
4890                                  ((optab_handler (optab2, compute_mode)
4891                                    != CODE_FOR_nothing)
4892                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4893           if (remainder == 0)
4894             {
4895               /* No luck there.  Can we do remainder and divide at once
4896                  without a library call?  */
4897               remainder = gen_reg_rtx (compute_mode);
4898               if (! expand_twoval_binop ((unsignedp
4899                                           ? udivmod_optab
4900                                           : sdivmod_optab),
4901                                          op0, op1,
4902                                          NULL_RTX, remainder, unsignedp))
4903                 remainder = 0;
4904             }
4905
4906           if (remainder)
4907             return gen_lowpart (mode, remainder);
4908         }
4909
4910       /* Produce the quotient.  Try a quotient insn, but not a library call.
4911          If we have a divmod in this mode, use it in preference to widening
4912          the div (for this test we assume it will not fail). Note that optab2
4913          is set to the one of the two optabs that the call below will use.  */
4914       quotient
4915         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4916                              op0, op1, rem_flag ? NULL_RTX : target,
4917                              unsignedp,
4918                              ((optab_handler (optab2, compute_mode)
4919                                != CODE_FOR_nothing)
4920                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4921
4922       if (quotient == 0)
4923         {
4924           /* No luck there.  Try a quotient-and-remainder insn,
4925              keeping the quotient alone.  */
4926           quotient = gen_reg_rtx (compute_mode);
4927           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4928                                      op0, op1,
4929                                      quotient, NULL_RTX, unsignedp))
4930             {
4931               quotient = 0;
4932               if (! rem_flag)
4933                 /* Still no luck.  If we are not computing the remainder,
4934                    use a library call for the quotient.  */
4935                 quotient = sign_expand_binop (compute_mode,
4936                                               udiv_optab, sdiv_optab,
4937                                               op0, op1, target,
4938                                               unsignedp, OPTAB_LIB_WIDEN);
4939             }
4940         }
4941     }
4942
4943   if (rem_flag)
4944     {
4945       if (target && GET_MODE (target) != compute_mode)
4946         target = 0;
4947
4948       if (quotient == 0)
4949         {
4950           /* No divide instruction either.  Use library for remainder.  */
4951           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4952                                          op0, op1, target,
4953                                          unsignedp, OPTAB_LIB_WIDEN);
4954           /* No remainder function.  Try a quotient-and-remainder
4955              function, keeping the remainder.  */
4956           if (!remainder)
4957             {
4958               remainder = gen_reg_rtx (compute_mode);
4959               if (!expand_twoval_binop_libfunc
4960                   (unsignedp ? udivmod_optab : sdivmod_optab,
4961                    op0, op1,
4962                    NULL_RTX, remainder,
4963                    unsignedp ? UMOD : MOD))
4964                 remainder = NULL_RTX;
4965             }
4966         }
4967       else
4968         {
4969           /* We divided.  Now finish doing X - Y * (X / Y).  */
4970           remainder = expand_mult (compute_mode, quotient, op1,
4971                                    NULL_RTX, unsignedp);
4972           remainder = expand_binop (compute_mode, sub_optab, op0,
4973                                     remainder, target, unsignedp,
4974                                     OPTAB_LIB_WIDEN);
4975         }
4976     }
4977
4978   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4979 }
4980 \f
4981 /* Return a tree node with data type TYPE, describing the value of X.
4982    Usually this is an VAR_DECL, if there is no obvious better choice.
4983    X may be an expression, however we only support those expressions
4984    generated by loop.c.  */
4985
4986 tree
4987 make_tree (tree type, rtx x)
4988 {
4989   tree t;
4990
4991   switch (GET_CODE (x))
4992     {
4993     case CONST_INT:
4994     case CONST_WIDE_INT:
4995       t = wide_int_to_tree (type, std::make_pair (x, TYPE_MODE (type)));
4996       return t;
4997
4998     case CONST_DOUBLE:
4999       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5000       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5001         t = wide_int_to_tree (type,
5002                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5003                                                     HOST_BITS_PER_WIDE_INT * 2));
5004       else
5005         {
5006           REAL_VALUE_TYPE d;
5007
5008           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
5009           t = build_real (type, d);
5010         }
5011
5012       return t;
5013
5014     case CONST_VECTOR:
5015       {
5016         int units = CONST_VECTOR_NUNITS (x);
5017         tree itype = TREE_TYPE (type);
5018         tree *elts;
5019         int i;
5020
5021         /* Build a tree with vector elements.  */
5022         elts = XALLOCAVEC (tree, units);
5023         for (i = units - 1; i >= 0; --i)
5024           {
5025             rtx elt = CONST_VECTOR_ELT (x, i);
5026             elts[i] = make_tree (itype, elt);
5027           }
5028
5029         return build_vector (type, elts);
5030       }
5031
5032     case PLUS:
5033       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5034                           make_tree (type, XEXP (x, 1)));
5035
5036     case MINUS:
5037       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5038                           make_tree (type, XEXP (x, 1)));
5039
5040     case NEG:
5041       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5042
5043     case MULT:
5044       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5045                           make_tree (type, XEXP (x, 1)));
5046
5047     case ASHIFT:
5048       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5049                           make_tree (type, XEXP (x, 1)));
5050
5051     case LSHIFTRT:
5052       t = unsigned_type_for (type);
5053       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5054                                          make_tree (t, XEXP (x, 0)),
5055                                          make_tree (type, XEXP (x, 1))));
5056
5057     case ASHIFTRT:
5058       t = signed_type_for (type);
5059       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5060                                          make_tree (t, XEXP (x, 0)),
5061                                          make_tree (type, XEXP (x, 1))));
5062
5063     case DIV:
5064       if (TREE_CODE (type) != REAL_TYPE)
5065         t = signed_type_for (type);
5066       else
5067         t = type;
5068
5069       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5070                                          make_tree (t, XEXP (x, 0)),
5071                                          make_tree (t, XEXP (x, 1))));
5072     case UDIV:
5073       t = unsigned_type_for (type);
5074       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5075                                          make_tree (t, XEXP (x, 0)),
5076                                          make_tree (t, XEXP (x, 1))));
5077
5078     case SIGN_EXTEND:
5079     case ZERO_EXTEND:
5080       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5081                                           GET_CODE (x) == ZERO_EXTEND);
5082       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5083
5084     case CONST:
5085       return make_tree (type, XEXP (x, 0));
5086
5087     case SYMBOL_REF:
5088       t = SYMBOL_REF_DECL (x);
5089       if (t)
5090         return fold_convert (type, build_fold_addr_expr (t));
5091       /* else fall through.  */
5092
5093     default:
5094       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5095
5096       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5097          address mode to pointer mode.  */
5098       if (POINTER_TYPE_P (type))
5099         x = convert_memory_address_addr_space
5100               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5101
5102       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5103          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5104       t->decl_with_rtl.rtl = x;
5105
5106       return t;
5107     }
5108 }
5109 \f
5110 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5111    and returning TARGET.
5112
5113    If TARGET is 0, a pseudo-register or constant is returned.  */
5114
5115 rtx
5116 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5117 {
5118   rtx tem = 0;
5119
5120   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5121     tem = simplify_binary_operation (AND, mode, op0, op1);
5122   if (tem == 0)
5123     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5124
5125   if (target == 0)
5126     target = tem;
5127   else if (tem != target)
5128     emit_move_insn (target, tem);
5129   return target;
5130 }
5131
5132 /* Helper function for emit_store_flag.  */
5133 rtx
5134 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5135              machine_mode mode, machine_mode compare_mode,
5136              int unsignedp, rtx x, rtx y, int normalizep,
5137              machine_mode target_mode)
5138 {
5139   struct expand_operand ops[4];
5140   rtx op0, comparison, subtarget;
5141   rtx_insn *last;
5142   machine_mode result_mode = targetm.cstore_mode (icode);
5143
5144   last = get_last_insn ();
5145   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5146   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5147   if (!x || !y)
5148     {
5149       delete_insns_since (last);
5150       return NULL_RTX;
5151     }
5152
5153   if (target_mode == VOIDmode)
5154     target_mode = result_mode;
5155   if (!target)
5156     target = gen_reg_rtx (target_mode);
5157
5158   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5159
5160   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5161   create_fixed_operand (&ops[1], comparison);
5162   create_fixed_operand (&ops[2], x);
5163   create_fixed_operand (&ops[3], y);
5164   if (!maybe_expand_insn (icode, 4, ops))
5165     {
5166       delete_insns_since (last);
5167       return NULL_RTX;
5168     }
5169   subtarget = ops[0].value;
5170
5171   /* If we are converting to a wider mode, first convert to
5172      TARGET_MODE, then normalize.  This produces better combining
5173      opportunities on machines that have a SIGN_EXTRACT when we are
5174      testing a single bit.  This mostly benefits the 68k.
5175
5176      If STORE_FLAG_VALUE does not have the sign bit set when
5177      interpreted in MODE, we can do this conversion as unsigned, which
5178      is usually more efficient.  */
5179   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5180     {
5181       convert_move (target, subtarget,
5182                     val_signbit_known_clear_p (result_mode,
5183                                                STORE_FLAG_VALUE));
5184       op0 = target;
5185       result_mode = target_mode;
5186     }
5187   else
5188     op0 = subtarget;
5189
5190   /* If we want to keep subexpressions around, don't reuse our last
5191      target.  */
5192   if (optimize)
5193     subtarget = 0;
5194
5195   /* Now normalize to the proper value in MODE.  Sometimes we don't
5196      have to do anything.  */
5197   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5198     ;
5199   /* STORE_FLAG_VALUE might be the most negative number, so write
5200      the comparison this way to avoid a compiler-time warning.  */
5201   else if (- normalizep == STORE_FLAG_VALUE)
5202     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5203
5204   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5205      it hard to use a value of just the sign bit due to ANSI integer
5206      constant typing rules.  */
5207   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5208     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5209                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5210                         normalizep == 1);
5211   else
5212     {
5213       gcc_assert (STORE_FLAG_VALUE & 1);
5214
5215       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5216       if (normalizep == -1)
5217         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5218     }
5219
5220   /* If we were converting to a smaller mode, do the conversion now.  */
5221   if (target_mode != result_mode)
5222     {
5223       convert_move (target, op0, 0);
5224       return target;
5225     }
5226   else
5227     return op0;
5228 }
5229
5230
5231 /* A subroutine of emit_store_flag only including "tricks" that do not
5232    need a recursive call.  These are kept separate to avoid infinite
5233    loops.  */
5234
5235 static rtx
5236 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5237                    machine_mode mode, int unsignedp, int normalizep,
5238                    machine_mode target_mode)
5239 {
5240   rtx subtarget;
5241   enum insn_code icode;
5242   machine_mode compare_mode;
5243   enum mode_class mclass;
5244   enum rtx_code scode;
5245   rtx tem;
5246
5247   if (unsignedp)
5248     code = unsigned_condition (code);
5249   scode = swap_condition (code);
5250
5251   /* If one operand is constant, make it the second one.  Only do this
5252      if the other operand is not constant as well.  */
5253
5254   if (swap_commutative_operands_p (op0, op1))
5255     {
5256       tem = op0;
5257       op0 = op1;
5258       op1 = tem;
5259       code = swap_condition (code);
5260     }
5261
5262   if (mode == VOIDmode)
5263     mode = GET_MODE (op0);
5264
5265   /* For some comparisons with 1 and -1, we can convert this to
5266      comparisons with zero.  This will often produce more opportunities for
5267      store-flag insns.  */
5268
5269   switch (code)
5270     {
5271     case LT:
5272       if (op1 == const1_rtx)
5273         op1 = const0_rtx, code = LE;
5274       break;
5275     case LE:
5276       if (op1 == constm1_rtx)
5277         op1 = const0_rtx, code = LT;
5278       break;
5279     case GE:
5280       if (op1 == const1_rtx)
5281         op1 = const0_rtx, code = GT;
5282       break;
5283     case GT:
5284       if (op1 == constm1_rtx)
5285         op1 = const0_rtx, code = GE;
5286       break;
5287     case GEU:
5288       if (op1 == const1_rtx)
5289         op1 = const0_rtx, code = NE;
5290       break;
5291     case LTU:
5292       if (op1 == const1_rtx)
5293         op1 = const0_rtx, code = EQ;
5294       break;
5295     default:
5296       break;
5297     }
5298
5299   /* If we are comparing a double-word integer with zero or -1, we can
5300      convert the comparison into one involving a single word.  */
5301   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5302       && GET_MODE_CLASS (mode) == MODE_INT
5303       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5304     {
5305       if ((code == EQ || code == NE)
5306           && (op1 == const0_rtx || op1 == constm1_rtx))
5307         {
5308           rtx op00, op01;
5309
5310           /* Do a logical OR or AND of the two words and compare the
5311              result.  */
5312           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5313           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5314           tem = expand_binop (word_mode,
5315                               op1 == const0_rtx ? ior_optab : and_optab,
5316                               op00, op01, NULL_RTX, unsignedp,
5317                               OPTAB_DIRECT);
5318
5319           if (tem != 0)
5320             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5321                                    unsignedp, normalizep);
5322         }
5323       else if ((code == LT || code == GE) && op1 == const0_rtx)
5324         {
5325           rtx op0h;
5326
5327           /* If testing the sign bit, can just test on high word.  */
5328           op0h = simplify_gen_subreg (word_mode, op0, mode,
5329                                       subreg_highpart_offset (word_mode,
5330                                                               mode));
5331           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5332                                  unsignedp, normalizep);
5333         }
5334       else
5335         tem = NULL_RTX;
5336
5337       if (tem)
5338         {
5339           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5340             return tem;
5341           if (!target)
5342             target = gen_reg_rtx (target_mode);
5343
5344           convert_move (target, tem,
5345                         !val_signbit_known_set_p (word_mode,
5346                                                   (normalizep ? normalizep
5347                                                    : STORE_FLAG_VALUE)));
5348           return target;
5349         }
5350     }
5351
5352   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5353      complement of A (for GE) and shifting the sign bit to the low bit.  */
5354   if (op1 == const0_rtx && (code == LT || code == GE)
5355       && GET_MODE_CLASS (mode) == MODE_INT
5356       && (normalizep || STORE_FLAG_VALUE == 1
5357           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5358     {
5359       subtarget = target;
5360
5361       if (!target)
5362         target_mode = mode;
5363
5364       /* If the result is to be wider than OP0, it is best to convert it
5365          first.  If it is to be narrower, it is *incorrect* to convert it
5366          first.  */
5367       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5368         {
5369           op0 = convert_modes (target_mode, mode, op0, 0);
5370           mode = target_mode;
5371         }
5372
5373       if (target_mode != mode)
5374         subtarget = 0;
5375
5376       if (code == GE)
5377         op0 = expand_unop (mode, one_cmpl_optab, op0,
5378                            ((STORE_FLAG_VALUE == 1 || normalizep)
5379                             ? 0 : subtarget), 0);
5380
5381       if (STORE_FLAG_VALUE == 1 || normalizep)
5382         /* If we are supposed to produce a 0/1 value, we want to do
5383            a logical shift from the sign bit to the low-order bit; for
5384            a -1/0 value, we do an arithmetic shift.  */
5385         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5386                             GET_MODE_BITSIZE (mode) - 1,
5387                             subtarget, normalizep != -1);
5388
5389       if (mode != target_mode)
5390         op0 = convert_modes (target_mode, mode, op0, 0);
5391
5392       return op0;
5393     }
5394
5395   mclass = GET_MODE_CLASS (mode);
5396   for (compare_mode = mode; compare_mode != VOIDmode;
5397        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5398     {
5399      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5400      icode = optab_handler (cstore_optab, optab_mode);
5401      if (icode != CODE_FOR_nothing)
5402         {
5403           do_pending_stack_adjust ();
5404           tem = emit_cstore (target, icode, code, mode, compare_mode,
5405                              unsignedp, op0, op1, normalizep, target_mode);
5406           if (tem)
5407             return tem;
5408
5409           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5410             {
5411               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5412                                  unsignedp, op1, op0, normalizep, target_mode);
5413               if (tem)
5414                 return tem;
5415             }
5416           break;
5417         }
5418     }
5419
5420   return 0;
5421 }
5422
5423 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5424    and storing in TARGET.  Normally return TARGET.
5425    Return 0 if that cannot be done.
5426
5427    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5428    it is VOIDmode, they cannot both be CONST_INT.
5429
5430    UNSIGNEDP is for the case where we have to widen the operands
5431    to perform the operation.  It says to use zero-extension.
5432
5433    NORMALIZEP is 1 if we should convert the result to be either zero
5434    or one.  Normalize is -1 if we should convert the result to be
5435    either zero or -1.  If NORMALIZEP is zero, the result will be left
5436    "raw" out of the scc insn.  */
5437
5438 rtx
5439 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5440                  machine_mode mode, int unsignedp, int normalizep)
5441 {
5442   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5443   enum rtx_code rcode;
5444   rtx subtarget;
5445   rtx tem, trueval;
5446   rtx_insn *last;
5447
5448   /* If we compare constants, we shouldn't use a store-flag operation,
5449      but a constant load.  We can get there via the vanilla route that
5450      usually generates a compare-branch sequence, but will in this case
5451      fold the comparison to a constant, and thus elide the branch.  */
5452   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5453     return NULL_RTX;
5454
5455   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5456                            target_mode);
5457   if (tem)
5458     return tem;
5459
5460   /* If we reached here, we can't do this with a scc insn, however there
5461      are some comparisons that can be done in other ways.  Don't do any
5462      of these cases if branches are very cheap.  */
5463   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5464     return 0;
5465
5466   /* See what we need to return.  We can only return a 1, -1, or the
5467      sign bit.  */
5468
5469   if (normalizep == 0)
5470     {
5471       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5472         normalizep = STORE_FLAG_VALUE;
5473
5474       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5475         ;
5476       else
5477         return 0;
5478     }
5479
5480   last = get_last_insn ();
5481
5482   /* If optimizing, use different pseudo registers for each insn, instead
5483      of reusing the same pseudo.  This leads to better CSE, but slows
5484      down the compiler, since there are more pseudos */
5485   subtarget = (!optimize
5486                && (target_mode == mode)) ? target : NULL_RTX;
5487   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5488
5489   /* For floating-point comparisons, try the reverse comparison or try
5490      changing the "orderedness" of the comparison.  */
5491   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5492     {
5493       enum rtx_code first_code;
5494       bool and_them;
5495
5496       rcode = reverse_condition_maybe_unordered (code);
5497       if (can_compare_p (rcode, mode, ccp_store_flag)
5498           && (code == ORDERED || code == UNORDERED
5499               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5500               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5501         {
5502           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5503                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5504
5505           /* For the reverse comparison, use either an addition or a XOR.  */
5506           if (want_add
5507               && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5508                            optimize_insn_for_speed_p ()) == 0)
5509             {
5510               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5511                                        STORE_FLAG_VALUE, target_mode);
5512               if (tem)
5513                 return expand_binop (target_mode, add_optab, tem,
5514                                      gen_int_mode (normalizep, target_mode),
5515                                      target, 0, OPTAB_WIDEN);
5516             }
5517           else if (!want_add
5518                    && rtx_cost (trueval, XOR, 1,
5519                                 optimize_insn_for_speed_p ()) == 0)
5520             {
5521               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5522                                        normalizep, target_mode);
5523               if (tem)
5524                 return expand_binop (target_mode, xor_optab, tem, trueval,
5525                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5526             }
5527         }
5528
5529       delete_insns_since (last);
5530
5531       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5532       if (code == ORDERED || code == UNORDERED)
5533         return 0;
5534
5535       and_them = split_comparison (code, mode, &first_code, &code);
5536
5537       /* If there are no NaNs, the first comparison should always fall through.
5538          Effectively change the comparison to the other one.  */
5539       if (!HONOR_NANS (mode))
5540         {
5541           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5542           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5543                                     target_mode);
5544         }
5545
5546 #ifdef HAVE_conditional_move
5547       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5548          conditional move.  */
5549       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5550                                normalizep, target_mode);
5551       if (tem == 0)
5552         return 0;
5553
5554       if (and_them)
5555         tem = emit_conditional_move (target, code, op0, op1, mode,
5556                                      tem, const0_rtx, GET_MODE (tem), 0);
5557       else
5558         tem = emit_conditional_move (target, code, op0, op1, mode,
5559                                      trueval, tem, GET_MODE (tem), 0);
5560
5561       if (tem == 0)
5562         delete_insns_since (last);
5563       return tem;
5564 #else
5565       return 0;
5566 #endif
5567     }
5568
5569   /* The remaining tricks only apply to integer comparisons.  */
5570
5571   if (GET_MODE_CLASS (mode) != MODE_INT)
5572     return 0;
5573
5574   /* If this is an equality comparison of integers, we can try to exclusive-or
5575      (or subtract) the two operands and use a recursive call to try the
5576      comparison with zero.  Don't do any of these cases if branches are
5577      very cheap.  */
5578
5579   if ((code == EQ || code == NE) && op1 != const0_rtx)
5580     {
5581       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5582                           OPTAB_WIDEN);
5583
5584       if (tem == 0)
5585         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5586                             OPTAB_WIDEN);
5587       if (tem != 0)
5588         tem = emit_store_flag (target, code, tem, const0_rtx,
5589                                mode, unsignedp, normalizep);
5590       if (tem != 0)
5591         return tem;
5592
5593       delete_insns_since (last);
5594     }
5595
5596   /* For integer comparisons, try the reverse comparison.  However, for
5597      small X and if we'd have anyway to extend, implementing "X != 0"
5598      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5599   rcode = reverse_condition (code);
5600   if (can_compare_p (rcode, mode, ccp_store_flag)
5601       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5602             && code == NE
5603             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5604             && op1 == const0_rtx))
5605     {
5606       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5607                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5608
5609       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5610       if (want_add
5611           && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5612                        optimize_insn_for_speed_p ()) == 0)
5613         {
5614           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5615                                    STORE_FLAG_VALUE, target_mode);
5616           if (tem != 0)
5617             tem = expand_binop (target_mode, add_optab, tem,
5618                                 gen_int_mode (normalizep, target_mode),
5619                                 target, 0, OPTAB_WIDEN);
5620         }
5621       else if (!want_add
5622                && rtx_cost (trueval, XOR, 1,
5623                             optimize_insn_for_speed_p ()) == 0)
5624         {
5625           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5626                                    normalizep, target_mode);
5627           if (tem != 0)
5628             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5629                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5630         }
5631
5632       if (tem != 0)
5633         return tem;
5634       delete_insns_since (last);
5635     }
5636
5637   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5638      the constant zero.  Reject all other comparisons at this point.  Only
5639      do LE and GT if branches are expensive since they are expensive on
5640      2-operand machines.  */
5641
5642   if (op1 != const0_rtx
5643       || (code != EQ && code != NE
5644           && (BRANCH_COST (optimize_insn_for_speed_p (),
5645                            false) <= 1 || (code != LE && code != GT))))
5646     return 0;
5647
5648   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5649      do the necessary operation below.  */
5650
5651   tem = 0;
5652
5653   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5654      the sign bit set.  */
5655
5656   if (code == LE)
5657     {
5658       /* This is destructive, so SUBTARGET can't be OP0.  */
5659       if (rtx_equal_p (subtarget, op0))
5660         subtarget = 0;
5661
5662       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5663                           OPTAB_WIDEN);
5664       if (tem)
5665         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5666                             OPTAB_WIDEN);
5667     }
5668
5669   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5670      number of bits in the mode of OP0, minus one.  */
5671
5672   if (code == GT)
5673     {
5674       if (rtx_equal_p (subtarget, op0))
5675         subtarget = 0;
5676
5677       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5678                           GET_MODE_BITSIZE (mode) - 1,
5679                           subtarget, 0);
5680       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5681                           OPTAB_WIDEN);
5682     }
5683
5684   if (code == EQ || code == NE)
5685     {
5686       /* For EQ or NE, one way to do the comparison is to apply an operation
5687          that converts the operand into a positive number if it is nonzero
5688          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5689          for NE we negate.  This puts the result in the sign bit.  Then we
5690          normalize with a shift, if needed.
5691
5692          Two operations that can do the above actions are ABS and FFS, so try
5693          them.  If that doesn't work, and MODE is smaller than a full word,
5694          we can use zero-extension to the wider mode (an unsigned conversion)
5695          as the operation.  */
5696
5697       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5698          that is compensated by the subsequent overflow when subtracting
5699          one / negating.  */
5700
5701       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5702         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5703       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5704         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5705       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5706         {
5707           tem = convert_modes (word_mode, mode, op0, 1);
5708           mode = word_mode;
5709         }
5710
5711       if (tem != 0)
5712         {
5713           if (code == EQ)
5714             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5715                                 0, OPTAB_WIDEN);
5716           else
5717             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5718         }
5719
5720       /* If we couldn't do it that way, for NE we can "or" the two's complement
5721          of the value with itself.  For EQ, we take the one's complement of
5722          that "or", which is an extra insn, so we only handle EQ if branches
5723          are expensive.  */
5724
5725       if (tem == 0
5726           && (code == NE
5727               || BRANCH_COST (optimize_insn_for_speed_p (),
5728                               false) > 1))
5729         {
5730           if (rtx_equal_p (subtarget, op0))
5731             subtarget = 0;
5732
5733           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5734           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5735                               OPTAB_WIDEN);
5736
5737           if (tem && code == EQ)
5738             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5739         }
5740     }
5741
5742   if (tem && normalizep)
5743     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5744                         GET_MODE_BITSIZE (mode) - 1,
5745                         subtarget, normalizep == 1);
5746
5747   if (tem)
5748     {
5749       if (!target)
5750         ;
5751       else if (GET_MODE (tem) != target_mode)
5752         {
5753           convert_move (target, tem, 0);
5754           tem = target;
5755         }
5756       else if (!subtarget)
5757         {
5758           emit_move_insn (target, tem);
5759           tem = target;
5760         }
5761     }
5762   else
5763     delete_insns_since (last);
5764
5765   return tem;
5766 }
5767
5768 /* Like emit_store_flag, but always succeeds.  */
5769
5770 rtx
5771 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5772                        machine_mode mode, int unsignedp, int normalizep)
5773 {
5774   rtx tem;
5775   rtx_code_label *label;
5776   rtx trueval, falseval;
5777
5778   /* First see if emit_store_flag can do the job.  */
5779   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5780   if (tem != 0)
5781     return tem;
5782
5783   if (!target)
5784     target = gen_reg_rtx (word_mode);
5785
5786   /* If this failed, we have to do this with set/compare/jump/set code.
5787      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5788   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5789   if (code == NE
5790       && GET_MODE_CLASS (mode) == MODE_INT
5791       && REG_P (target)
5792       && op0 == target
5793       && op1 == const0_rtx)
5794     {
5795       label = gen_label_rtx ();
5796       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5797                                mode, NULL_RTX, NULL_RTX, label, -1);
5798       emit_move_insn (target, trueval);
5799       emit_label (label);
5800       return target;
5801     }
5802
5803   if (!REG_P (target)
5804       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5805     target = gen_reg_rtx (GET_MODE (target));
5806
5807   /* Jump in the right direction if the target cannot implement CODE
5808      but can jump on its reverse condition.  */
5809   falseval = const0_rtx;
5810   if (! can_compare_p (code, mode, ccp_jump)
5811       && (! FLOAT_MODE_P (mode)
5812           || code == ORDERED || code == UNORDERED
5813           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5814           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5815     {
5816       enum rtx_code rcode;
5817       if (FLOAT_MODE_P (mode))
5818         rcode = reverse_condition_maybe_unordered (code);
5819       else
5820         rcode = reverse_condition (code);
5821
5822       /* Canonicalize to UNORDERED for the libcall.  */
5823       if (can_compare_p (rcode, mode, ccp_jump)
5824           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5825         {
5826           falseval = trueval;
5827           trueval = const0_rtx;
5828           code = rcode;
5829         }
5830     }
5831
5832   emit_move_insn (target, trueval);
5833   label = gen_label_rtx ();
5834   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5835                            NULL_RTX, label, -1);
5836
5837   emit_move_insn (target, falseval);
5838   emit_label (label);
5839
5840   return target;
5841 }
5842 \f
5843 /* Perform possibly multi-word comparison and conditional jump to LABEL
5844    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5845    now a thin wrapper around do_compare_rtx_and_jump.  */
5846
5847 static void
5848 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
5849                  rtx_code_label *label)
5850 {
5851   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5852   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5853                            NULL_RTX, NULL_RTX, label, -1);
5854 }