gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2015 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "diagnostic-core.h"
  27 #include "rtl.h"
  28 #include "hash-set.h"
  29 #include "machmode.h"
  30 #include "vec.h"
  31 #include "double-int.h"
  32 #include "input.h"
  33 #include "alias.h"
  34 #include "symtab.h"
  35 #include "wide-int.h"
  36 #include "inchash.h"
  37 #include "tree.h"
  38 #include "fold-const.h"
  39 #include "stor-layout.h"
  40 #include "tm_p.h"
  41 #include "flags.h"
  42 #include "insn-config.h"
  43 #include "hashtab.h"
  44 #include "hard-reg-set.h"
  45 #include "function.h"
  46 #include "statistics.h"
  47 #include "real.h"
  48 #include "fixed-value.h"
  49 #include "expmed.h"
  50 #include "dojump.h"
  51 #include "explow.h"
  52 #include "calls.h"
  53 #include "emit-rtl.h"
  54 #include "varasm.h"
  55 #include "stmt.h"
  56 #include "expr.h"
  57 #include "insn-codes.h"
  58 #include "optabs.h"
  59 #include "recog.h"
  60 #include "langhooks.h"
  61 #include "predict.h"
  62 #include "basic-block.h"
  63 #include "df.h"
  64 #include "target.h"
  65
  66 struct target_expmed default_target_expmed;
  67 #if SWITCHABLE_TARGET
  68 struct target_expmed *this_target_expmed = &default_target_expmed;
  69 #endif
  70
  71 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  72                                    unsigned HOST_WIDE_INT,
  73                                    unsigned HOST_WIDE_INT,
  74                                    unsigned HOST_WIDE_INT,
  75                                    rtx);
  76 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
  77                                      unsigned HOST_WIDE_INT,
  78                                      rtx);
  79 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  80                                    unsigned HOST_WIDE_INT,
  81                                    unsigned HOST_WIDE_INT,
  82                                    unsigned HOST_WIDE_INT,
  83                                    rtx);
  84 static rtx extract_fixed_bit_field (machine_mode, rtx,
  85                                     unsigned HOST_WIDE_INT,
  86                                     unsigned HOST_WIDE_INT, rtx, int);
  87 static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
  88                                       unsigned HOST_WIDE_INT,
  89                                       unsigned HOST_WIDE_INT, rtx, int);
  90 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  91 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  92                                     unsigned HOST_WIDE_INT, int);
  93 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  94 static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  95 static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  96
  97 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  98    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  99    The mask is truncated if necessary to the width of mode MODE.  The
 100    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
 101
 102 static inline rtx
 103 mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
 104 {
 105   return immed_wide_int_const
 106     (wi::shifted_mask (bitpos, bitsize, complement,
 107                        GET_MODE_PRECISION (mode)), mode);
 108 }
 109
 110 /* Test whether a value is zero of a power of two.  */
 111 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
 112   (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
 113
 114 struct init_expmed_rtl
 115 {
 116   rtx reg;
 117   rtx plus;
 118   rtx neg;
 119   rtx mult;
 120   rtx sdiv;
 121   rtx udiv;
 122   rtx sdiv_32;
 123   rtx smod_32;
 124   rtx wide_mult;
 125   rtx wide_lshr;
 126   rtx wide_trunc;
 127   rtx shift;
 128   rtx shift_mult;
 129   rtx shift_add;
 130   rtx shift_sub0;
 131   rtx shift_sub1;
 132   rtx zext;
 133   rtx trunc;
 134
 135   rtx pow2[MAX_BITS_PER_WORD];
 136   rtx cint[MAX_BITS_PER_WORD];
 137 };
 138
 139 static void
 140 init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
 141                       machine_mode from_mode, bool speed)
 142 {
 143   int to_size, from_size;
 144   rtx which;
 145
 146   to_size = GET_MODE_PRECISION (to_mode);
 147   from_size = GET_MODE_PRECISION (from_mode);
 148
 149   /* Most partial integers have a precision less than the "full"
 150      integer it requires for storage.  In case one doesn't, for
 151      comparison purposes here, reduce the bit size by one in that
 152      case.  */
 153   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 154       && exact_log2 (to_size) != -1)
 155     to_size --;
 156   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 157       && exact_log2 (from_size) != -1)
 158     from_size --;
 159
 160   /* Assume cost of zero-extend and sign-extend is the same.  */
 161   which = (to_size < from_size ? all->trunc : all->zext);
 162
 163   PUT_MODE (all->reg, from_mode);
 164   set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
 165 }
 166
 167 static void
 168 init_expmed_one_mode (struct init_expmed_rtl *all,
 169                       machine_mode mode, int speed)
 170 {
 171   int m, n, mode_bitsize;
 172   machine_mode mode_from;
 173
 174   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 175
 176   PUT_MODE (all->reg, mode);
 177   PUT_MODE (all->plus, mode);
 178   PUT_MODE (all->neg, mode);
 179   PUT_MODE (all->mult, mode);
 180   PUT_MODE (all->sdiv, mode);
 181   PUT_MODE (all->udiv, mode);
 182   PUT_MODE (all->sdiv_32, mode);
 183   PUT_MODE (all->smod_32, mode);
 184   PUT_MODE (all->wide_trunc, mode);
 185   PUT_MODE (all->shift, mode);
 186   PUT_MODE (all->shift_mult, mode);
 187   PUT_MODE (all->shift_add, mode);
 188   PUT_MODE (all->shift_sub0, mode);
 189   PUT_MODE (all->shift_sub1, mode);
 190   PUT_MODE (all->zext, mode);
 191   PUT_MODE (all->trunc, mode);
 192
 193   set_add_cost (speed, mode, set_src_cost (all->plus, speed));
 194   set_neg_cost (speed, mode, set_src_cost (all->neg, speed));
 195   set_mul_cost (speed, mode, set_src_cost (all->mult, speed));
 196   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, speed));
 197   set_udiv_cost (speed, mode, set_src_cost (all->udiv, speed));
 198
 199   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, speed)
 200                                      <= 2 * add_cost (speed, mode)));
 201   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, speed)
 202                                      <= 4 * add_cost (speed, mode)));
 203
 204   set_shift_cost (speed, mode, 0, 0);
 205   {
 206     int cost = add_cost (speed, mode);
 207     set_shiftadd_cost (speed, mode, 0, cost);
 208     set_shiftsub0_cost (speed, mode, 0, cost);
 209     set_shiftsub1_cost (speed, mode, 0, cost);
 210   }
 211
 212   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 213   for (m = 1; m < n; m++)
 214     {
 215       XEXP (all->shift, 1) = all->cint[m];
 216       XEXP (all->shift_mult, 1) = all->pow2[m];
 217
 218       set_shift_cost (speed, mode, m, set_src_cost (all->shift, speed));
 219       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, speed));
 220       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, speed));
 221       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, speed));
 222     }
 223
 224   if (SCALAR_INT_MODE_P (mode))
 225     {
 226       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 227            mode_from = (machine_mode)(mode_from + 1))
 228         init_expmed_one_conv (all, mode, mode_from, speed);
 229     }
 230   if (GET_MODE_CLASS (mode) == MODE_INT)
 231     {
 232       machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
 233       if (wider_mode != VOIDmode)
 234         {
 235           PUT_MODE (all->zext, wider_mode);
 236           PUT_MODE (all->wide_mult, wider_mode);
 237           PUT_MODE (all->wide_lshr, wider_mode);
 238           XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 239
 240           set_mul_widen_cost (speed, wider_mode,
 241                               set_src_cost (all->wide_mult, speed));
 242           set_mul_highpart_cost (speed, mode,
 243                                  set_src_cost (all->wide_trunc, speed));
 244         }
 245     }
 246 }
 247
 248 void
 249 init_expmed (void)
 250 {
 251   struct init_expmed_rtl all;
 252   machine_mode mode = QImode;
 253   int m, speed;
 254
 255   memset (&all, 0, sizeof all);
 256   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 257     {
 258       all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 259       all.cint[m] = GEN_INT (m);
 260     }
 261
 262   /* Avoid using hard regs in ways which may be unsupported.  */
 263   all.reg = gen_rtx_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 264   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 265   all.neg = gen_rtx_NEG (mode, all.reg);
 266   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 267   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 268   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 269   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 270   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 271   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 272   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 273   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 274   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 275   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 276   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 277   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 278   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 279   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 280   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 281
 282   for (speed = 0; speed < 2; speed++)
 283     {
 284       crtl->maybe_hot_insn_p = speed;
 285       set_zero_cost (speed, set_src_cost (const0_rtx, speed));
 286
 287       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 288            mode = (machine_mode)(mode + 1))
 289         init_expmed_one_mode (&all, mode, speed);
 290
 291       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 292         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 293              mode = (machine_mode)(mode + 1))
 294           init_expmed_one_mode (&all, mode, speed);
 295
 296       if (MIN_MODE_VECTOR_INT != VOIDmode)
 297         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 298              mode = (machine_mode)(mode + 1))
 299           init_expmed_one_mode (&all, mode, speed);
 300     }
 301
 302   if (alg_hash_used_p ())
 303     {
 304       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 305       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 306     }
 307   else
 308     set_alg_hash_used_p (true);
 309   default_rtl_profile ();
 310
 311   ggc_free (all.trunc);
 312   ggc_free (all.shift_sub1);
 313   ggc_free (all.shift_sub0);
 314   ggc_free (all.shift_add);
 315   ggc_free (all.shift_mult);
 316   ggc_free (all.shift);
 317   ggc_free (all.wide_trunc);
 318   ggc_free (all.wide_lshr);
 319   ggc_free (all.wide_mult);
 320   ggc_free (all.zext);
 321   ggc_free (all.smod_32);
 322   ggc_free (all.sdiv_32);
 323   ggc_free (all.udiv);
 324   ggc_free (all.sdiv);
 325   ggc_free (all.mult);
 326   ggc_free (all.neg);
 327   ggc_free (all.plus);
 328   ggc_free (all.reg);
 329 }
 330
 331 /* Return an rtx representing minus the value of X.
 332    MODE is the intended mode of the result,
 333    useful if X is a CONST_INT.  */
 334
 335 rtx
 336 negate_rtx (machine_mode mode, rtx x)
 337 {
 338   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 339
 340   if (result == 0)
 341     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 342
 343   return result;
 344 }
 345
 346 /* Adjust bitfield memory MEM so that it points to the first unit of mode
 347    MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
 348    If MODE is BLKmode, return a reference to every byte in the bitfield.
 349    Set *NEW_BITNUM to the bit position of the field within the new memory.  */
 350
 351 static rtx
 352 narrow_bit_field_mem (rtx mem, machine_mode mode,
 353                       unsigned HOST_WIDE_INT bitsize,
 354                       unsigned HOST_WIDE_INT bitnum,
 355                       unsigned HOST_WIDE_INT *new_bitnum)
 356 {
 357   if (mode == BLKmode)
 358     {
 359       *new_bitnum = bitnum % BITS_PER_UNIT;
 360       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 361       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 362                             / BITS_PER_UNIT);
 363       return adjust_bitfield_address_size (mem, mode, offset, size);
 364     }
 365   else
 366     {
 367       unsigned int unit = GET_MODE_BITSIZE (mode);
 368       *new_bitnum = bitnum % unit;
 369       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 370       return adjust_bitfield_address (mem, mode, offset);
 371     }
 372 }
 373
 374 /* The caller wants to perform insertion or extraction PATTERN on a
 375    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 376    BITREGION_START and BITREGION_END are as for store_bit_field
 377    and FIELDMODE is the natural mode of the field.
 378
 379    Search for a mode that is compatible with the memory access
 380    restrictions and (where applicable) with a register insertion or
 381    extraction.  Return the new memory on success, storing the adjusted
 382    bit position in *NEW_BITNUM.  Return null otherwise.  */
 383
 384 static rtx
 385 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 386                               rtx op0, HOST_WIDE_INT bitsize,
 387                               HOST_WIDE_INT bitnum,
 388                               unsigned HOST_WIDE_INT bitregion_start,
 389                               unsigned HOST_WIDE_INT bitregion_end,
 390                               machine_mode fieldmode,
 391                               unsigned HOST_WIDE_INT *new_bitnum)
 392 {
 393   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 394                                 bitregion_end, MEM_ALIGN (op0),
 395                                 MEM_VOLATILE_P (op0));
 396   machine_mode best_mode;
 397   if (iter.next_mode (&best_mode))
 398     {
 399       /* We can use a memory in BEST_MODE.  See whether this is true for
 400          any wider modes.  All other things being equal, we prefer to
 401          use the widest mode possible because it tends to expose more
 402          CSE opportunities.  */
 403       if (!iter.prefer_smaller_modes ())
 404         {
 405           /* Limit the search to the mode required by the corresponding
 406              register insertion or extraction instruction, if any.  */
 407           machine_mode limit_mode = word_mode;
 408           extraction_insn insn;
 409           if (get_best_reg_extraction_insn (&insn, pattern,
 410                                             GET_MODE_BITSIZE (best_mode),
 411                                             fieldmode))
 412             limit_mode = insn.field_mode;
 413
 414           machine_mode wider_mode;
 415           while (iter.next_mode (&wider_mode)
 416                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 417             best_mode = wider_mode;
 418         }
 419       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 420                                    new_bitnum);
 421     }
 422   return NULL_RTX;
 423 }
 424
 425 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 426    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 427    offset is then BITNUM / BITS_PER_UNIT.  */
 428
 429 static bool
 430 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 431                      unsigned HOST_WIDE_INT bitsize,
 432                      machine_mode struct_mode)
 433 {
 434   if (BYTES_BIG_ENDIAN)
 435     return (bitnum % BITS_PER_UNIT == 0
 436             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 437                 || (bitnum + bitsize) % BITS_PER_WORD == 0));
 438   else
 439     return bitnum % BITS_PER_WORD == 0;
 440 }
 441
 442 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 443    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 444    Return false if the access would touch memory outside the range
 445    BITREGION_START to BITREGION_END for conformance to the C++ memory
 446    model.  */
 447
 448 static bool
 449 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 450                             unsigned HOST_WIDE_INT bitnum,
 451                             machine_mode fieldmode,
 452                             unsigned HOST_WIDE_INT bitregion_start,
 453                             unsigned HOST_WIDE_INT bitregion_end)
 454 {
 455   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 456
 457   /* -fstrict-volatile-bitfields must be enabled and we must have a
 458      volatile MEM.  */
 459   if (!MEM_P (op0)
 460       || !MEM_VOLATILE_P (op0)
 461       || flag_strict_volatile_bitfields <= 0)
 462     return false;
 463
 464   /* Non-integral modes likely only happen with packed structures.
 465      Punt.  */
 466   if (!SCALAR_INT_MODE_P (fieldmode))
 467     return false;
 468
 469   /* The bit size must not be larger than the field mode, and
 470      the field mode must not be larger than a word.  */
 471   if (bitsize > modesize || modesize > BITS_PER_WORD)
 472     return false;
 473
 474   /* Check for cases of unaligned fields that must be split.  */
 475   if (bitnum % BITS_PER_UNIT + bitsize > modesize
 476       || (STRICT_ALIGNMENT
 477           && bitnum % GET_MODE_ALIGNMENT (fieldmode) + bitsize > modesize))
 478     return false;
 479
 480   /* Check for cases where the C++ memory model applies.  */
 481   if (bitregion_end != 0
 482       && (bitnum - bitnum % modesize < bitregion_start
 483           || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
 484     return false;
 485
 486   return true;
 487 }
 488
 489 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 490    bit number BITNUM can be treated as a simple value of mode MODE.  */
 491
 492 static bool
 493 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 494                        unsigned HOST_WIDE_INT bitnum, machine_mode mode)
 495 {
 496   return (MEM_P (op0)
 497           && bitnum % BITS_PER_UNIT == 0
 498           && bitsize == GET_MODE_BITSIZE (mode)
 499           && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
 500               || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
 501                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 502 }
 503 \f
 504 /* Try to use instruction INSV to store VALUE into a field of OP0.
 505    BITSIZE and BITNUM are as for store_bit_field.  */
 506
 507 static bool
 508 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 509                             unsigned HOST_WIDE_INT bitsize,
 510                             unsigned HOST_WIDE_INT bitnum,
 511                             rtx value)
 512 {
 513   struct expand_operand ops[4];
 514   rtx value1;
 515   rtx xop0 = op0;
 516   rtx_insn *last = get_last_insn ();
 517   bool copy_back = false;
 518
 519   machine_mode op_mode = insv->field_mode;
 520   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 521   if (bitsize == 0 || bitsize > unit)
 522     return false;
 523
 524   if (MEM_P (xop0))
 525     /* Get a reference to the first byte of the field.  */
 526     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 527                                  &bitnum);
 528   else
 529     {
 530       /* Convert from counting within OP0 to counting in OP_MODE.  */
 531       if (BYTES_BIG_ENDIAN)
 532         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 533
 534       /* If xop0 is a register, we need it in OP_MODE
 535          to make it acceptable to the format of insv.  */
 536       if (GET_CODE (xop0) == SUBREG)
 537         /* We can't just change the mode, because this might clobber op0,
 538            and we will need the original value of op0 if insv fails.  */
 539         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 540       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 541         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 542     }
 543
 544   /* If the destination is a paradoxical subreg such that we need a
 545      truncate to the inner mode, perform the insertion on a temporary and
 546      truncate the result to the original destination.  Note that we can't
 547      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 548      X) 0)) is (reg:N X).  */
 549   if (GET_CODE (xop0) == SUBREG
 550       && REG_P (SUBREG_REG (xop0))
 551       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 552                                          op_mode))
 553     {
 554       rtx tem = gen_reg_rtx (op_mode);
 555       emit_move_insn (tem, xop0);
 556       xop0 = tem;
 557       copy_back = true;
 558     }
 559
 560   /* There are similar overflow check at the start of store_bit_field_1,
 561      but that only check the situation where the field lies completely
 562      outside the register, while there do have situation where the field
 563      lies partialy in the register, we need to adjust bitsize for this
 564      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
 565      will broken on those arch support bit insert instruction, like arm, aarch64
 566      etc.  */
 567   if (bitsize + bitnum > unit && bitnum < unit)
 568     {
 569       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
 570                "destination object, data truncated into %wu-bit",
 571                bitsize, unit - bitnum);
 572       bitsize = unit - bitnum;
 573     }
 574
 575   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 576      "backwards" from the size of the unit we are inserting into.
 577      Otherwise, we count bits from the most significant on a
 578      BYTES/BITS_BIG_ENDIAN machine.  */
 579
 580   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 581     bitnum = unit - bitsize - bitnum;
 582
 583   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 584   value1 = value;
 585   if (GET_MODE (value) != op_mode)
 586     {
 587       if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 588         {
 589           /* Optimization: Don't bother really extending VALUE
 590              if it has all the bits we will actually use.  However,
 591              if we must narrow it, be sure we do it correctly.  */
 592
 593           if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 594             {
 595               rtx tmp;
 596
 597               tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 598               if (! tmp)
 599                 tmp = simplify_gen_subreg (op_mode,
 600                                            force_reg (GET_MODE (value),
 601                                                       value1),
 602                                            GET_MODE (value), 0);
 603               value1 = tmp;
 604             }
 605           else
 606             value1 = gen_lowpart (op_mode, value1);
 607         }
 608       else if (CONST_INT_P (value))
 609         value1 = gen_int_mode (INTVAL (value), op_mode);
 610       else
 611         /* Parse phase is supposed to make VALUE's data type
 612            match that of the component reference, which is a type
 613            at least as wide as the field; so VALUE should have
 614            a mode that corresponds to that type.  */
 615         gcc_assert (CONSTANT_P (value));
 616     }
 617
 618   create_fixed_operand (&ops[0], xop0);
 619   create_integer_operand (&ops[1], bitsize);
 620   create_integer_operand (&ops[2], bitnum);
 621   create_input_operand (&ops[3], value1, op_mode);
 622   if (maybe_expand_insn (insv->icode, 4, ops))
 623     {
 624       if (copy_back)
 625         convert_move (op0, xop0, true);
 626       return true;
 627     }
 628   delete_insns_since (last);
 629   return false;
 630 }
 631
 632 /* A subroutine of store_bit_field, with the same arguments.  Return true
 633    if the operation could be implemented.
 634
 635    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 636    no other way of implementing the operation.  If FALLBACK_P is false,
 637    return false instead.  */
 638
 639 static bool
 640 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 641                    unsigned HOST_WIDE_INT bitnum,
 642                    unsigned HOST_WIDE_INT bitregion_start,
 643                    unsigned HOST_WIDE_INT bitregion_end,
 644                    machine_mode fieldmode,
 645                    rtx value, bool fallback_p)
 646 {
 647   rtx op0 = str_rtx;
 648   rtx orig_value;
 649
 650   while (GET_CODE (op0) == SUBREG)
 651     {
 652       /* The following line once was done only if WORDS_BIG_ENDIAN,
 653          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 654          meaningful at a much higher level; when structures are copied
 655          between memory and regs, the higher-numbered regs
 656          always get higher addresses.  */
 657       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 658       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 659       int byte_offset = 0;
 660
 661       /* Paradoxical subregs need special handling on big endian machines.  */
 662       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 663         {
 664           int difference = inner_mode_size - outer_mode_size;
 665
 666           if (WORDS_BIG_ENDIAN)
 667             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 668           if (BYTES_BIG_ENDIAN)
 669             byte_offset += difference % UNITS_PER_WORD;
 670         }
 671       else
 672         byte_offset = SUBREG_BYTE (op0);
 673
 674       bitnum += byte_offset * BITS_PER_UNIT;
 675       op0 = SUBREG_REG (op0);
 676     }
 677
 678   /* No action is needed if the target is a register and if the field
 679      lies completely outside that register.  This can occur if the source
 680      code contains an out-of-bounds access to a small array.  */
 681   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 682     return true;
 683
 684   /* Use vec_set patterns for inserting parts of vectors whenever
 685      available.  */
 686   if (VECTOR_MODE_P (GET_MODE (op0))
 687       && !MEM_P (op0)
 688       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 689       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 690       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 691       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 692     {
 693       struct expand_operand ops[3];
 694       machine_mode outermode = GET_MODE (op0);
 695       machine_mode innermode = GET_MODE_INNER (outermode);
 696       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 697       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 698
 699       create_fixed_operand (&ops[0], op0);
 700       create_input_operand (&ops[1], value, innermode);
 701       create_integer_operand (&ops[2], pos);
 702       if (maybe_expand_insn (icode, 3, ops))
 703         return true;
 704     }
 705
 706   /* If the target is a register, overwriting the entire object, or storing
 707      a full-word or multi-word field can be done with just a SUBREG.  */
 708   if (!MEM_P (op0)
 709       && bitsize == GET_MODE_BITSIZE (fieldmode)
 710       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 711           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 712     {
 713       /* Use the subreg machinery either to narrow OP0 to the required
 714          words or to cope with mode punning between equal-sized modes.
 715          In the latter case, use subreg on the rhs side, not lhs.  */
 716       rtx sub;
 717
 718       if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
 719         {
 720           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 721           if (sub)
 722             {
 723               emit_move_insn (op0, sub);
 724               return true;
 725             }
 726         }
 727       else
 728         {
 729           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 730                                      bitnum / BITS_PER_UNIT);
 731           if (sub)
 732             {
 733               emit_move_insn (sub, value);
 734               return true;
 735             }
 736         }
 737     }
 738
 739   /* If the target is memory, storing any naturally aligned field can be
 740      done with a simple store.  For targets that support fast unaligned
 741      memory, any naturally sized, unit aligned field can be done directly.  */
 742   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
 743     {
 744       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 745       emit_move_insn (op0, value);
 746       return true;
 747     }
 748
 749   /* Make sure we are playing with integral modes.  Pun with subregs
 750      if we aren't.  This must come after the entire register case above,
 751      since that case is valid for any mode.  The following cases are only
 752      valid for integral modes.  */
 753   {
 754     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 755     if (imode != GET_MODE (op0))
 756       {
 757         if (MEM_P (op0))
 758           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
 759         else
 760           {
 761             gcc_assert (imode != BLKmode);
 762             op0 = gen_lowpart (imode, op0);
 763           }
 764       }
 765   }
 766
 767   /* Storing an lsb-aligned field in a register
 768      can be done with a movstrict instruction.  */
 769
 770   if (!MEM_P (op0)
 771       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 772       && bitsize == GET_MODE_BITSIZE (fieldmode)
 773       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 774     {
 775       struct expand_operand ops[2];
 776       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 777       rtx arg0 = op0;
 778       unsigned HOST_WIDE_INT subreg_off;
 779
 780       if (GET_CODE (arg0) == SUBREG)
 781         {
 782           /* Else we've got some float mode source being extracted into
 783              a different float mode destination -- this combination of
 784              subregs results in Severe Tire Damage.  */
 785           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 786                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 787                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 788           arg0 = SUBREG_REG (arg0);
 789         }
 790
 791       subreg_off = bitnum / BITS_PER_UNIT;
 792       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 793         {
 794           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 795
 796           create_fixed_operand (&ops[0], arg0);
 797           /* Shrink the source operand to FIELDMODE.  */
 798           create_convert_operand_to (&ops[1], value, fieldmode, false);
 799           if (maybe_expand_insn (icode, 2, ops))
 800             return true;
 801         }
 802     }
 803
 804   /* Handle fields bigger than a word.  */
 805
 806   if (bitsize > BITS_PER_WORD)
 807     {
 808       /* Here we transfer the words of the field
 809          in the order least significant first.
 810          This is because the most significant word is the one which may
 811          be less than full.
 812          However, only do that if the value is not BLKmode.  */
 813
 814       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 815       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 816       unsigned int i;
 817       rtx_insn *last;
 818
 819       /* This is the mode we must force value to, so that there will be enough
 820          subwords to extract.  Note that fieldmode will often (always?) be
 821          VOIDmode, because that is what store_field uses to indicate that this
 822          is a bit field, but passing VOIDmode to operand_subword_force
 823          is not allowed.  */
 824       fieldmode = GET_MODE (value);
 825       if (fieldmode == VOIDmode)
 826         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 827
 828       last = get_last_insn ();
 829       for (i = 0; i < nwords; i++)
 830         {
 831           /* If I is 0, use the low-order word in both field and target;
 832              if I is 1, use the next to lowest word; and so on.  */
 833           unsigned int wordnum = (backwards
 834                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 835                                   - i - 1
 836                                   : i);
 837           unsigned int bit_offset = (backwards
 838                                      ? MAX ((int) bitsize - ((int) i + 1)
 839                                             * BITS_PER_WORD,
 840                                             0)
 841                                      : (int) i * BITS_PER_WORD);
 842           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 843           unsigned HOST_WIDE_INT new_bitsize =
 844             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 845
 846           /* If the remaining chunk doesn't have full wordsize we have
 847              to make sure that for big endian machines the higher order
 848              bits are used.  */
 849           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 850             value_word = simplify_expand_binop (word_mode, lshr_optab,
 851                                                 value_word,
 852                                                 GEN_INT (BITS_PER_WORD
 853                                                          - new_bitsize),
 854                                                 NULL_RTX, true,
 855                                                 OPTAB_LIB_WIDEN);
 856
 857           if (!store_bit_field_1 (op0, new_bitsize,
 858                                   bitnum + bit_offset,
 859                                   bitregion_start, bitregion_end,
 860                                   word_mode,
 861                                   value_word, fallback_p))
 862             {
 863               delete_insns_since (last);
 864               return false;
 865             }
 866         }
 867       return true;
 868     }
 869
 870   /* If VALUE has a floating-point or complex mode, access it as an
 871      integer of the corresponding size.  This can occur on a machine
 872      with 64 bit registers that uses SFmode for float.  It can also
 873      occur for unaligned float or complex fields.  */
 874   orig_value = value;
 875   if (GET_MODE (value) != VOIDmode
 876       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 877       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 878     {
 879       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 880       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 881     }
 882
 883   /* If OP0 is a multi-word register, narrow it to the affected word.
 884      If the region spans two words, defer to store_split_bit_field.  */
 885   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 886     {
 887       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
 888                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 889       gcc_assert (op0);
 890       bitnum %= BITS_PER_WORD;
 891       if (bitnum + bitsize > BITS_PER_WORD)
 892         {
 893           if (!fallback_p)
 894             return false;
 895
 896           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 897                                  bitregion_end, value);
 898           return true;
 899         }
 900     }
 901
 902   /* From here on we can assume that the field to be stored in fits
 903      within a word.  If the destination is a register, it too fits
 904      in a word.  */
 905
 906   extraction_insn insv;
 907   if (!MEM_P (op0)
 908       && get_best_reg_extraction_insn (&insv, EP_insv,
 909                                        GET_MODE_BITSIZE (GET_MODE (op0)),
 910                                        fieldmode)
 911       && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 912     return true;
 913
 914   /* If OP0 is a memory, try copying it to a register and seeing if a
 915      cheap register alternative is available.  */
 916   if (MEM_P (op0))
 917     {
 918       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
 919                                         fieldmode)
 920           && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 921         return true;
 922
 923       rtx_insn *last = get_last_insn ();
 924
 925       /* Try loading part of OP0 into a register, inserting the bitfield
 926          into that, and then copying the result back to OP0.  */
 927       unsigned HOST_WIDE_INT bitpos;
 928       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
 929                                                bitregion_start, bitregion_end,
 930                                                fieldmode, &bitpos);
 931       if (xop0)
 932         {
 933           rtx tempreg = copy_to_reg (xop0);
 934           if (store_bit_field_1 (tempreg, bitsize, bitpos,
 935                                  bitregion_start, bitregion_end,
 936                                  fieldmode, orig_value, false))
 937             {
 938               emit_move_insn (xop0, tempreg);
 939               return true;
 940             }
 941           delete_insns_since (last);
 942         }
 943     }
 944
 945   if (!fallback_p)
 946     return false;
 947
 948   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
 949                          bitregion_end, value);
 950   return true;
 951 }
 952
 953 /* Generate code to store value from rtx VALUE
 954    into a bit-field within structure STR_RTX
 955    containing BITSIZE bits starting at bit BITNUM.
 956
 957    BITREGION_START is bitpos of the first bitfield in this region.
 958    BITREGION_END is the bitpos of the ending bitfield in this region.
 959    These two fields are 0, if the C++ memory model does not apply,
 960    or we are not interested in keeping track of bitfield regions.
 961
 962    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 963
 964 void
 965 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 966                  unsigned HOST_WIDE_INT bitnum,
 967                  unsigned HOST_WIDE_INT bitregion_start,
 968                  unsigned HOST_WIDE_INT bitregion_end,
 969                  machine_mode fieldmode,
 970                  rtx value)
 971 {
 972   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
 973   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
 974                                   bitregion_start, bitregion_end))
 975     {
 976       /* Storing any naturally aligned field can be done with a simple
 977          store.  For targets that support fast unaligned memory, any
 978          naturally sized, unit aligned field can be done directly.  */
 979       if (bitsize == GET_MODE_BITSIZE (fieldmode))
 980         {
 981           str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
 982                                              bitnum / BITS_PER_UNIT);
 983           emit_move_insn (str_rtx, value);
 984         }
 985       else
 986         {
 987           rtx temp;
 988
 989           str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
 990                                           &bitnum);
 991           temp = copy_to_reg (str_rtx);
 992           if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
 993                                   fieldmode, value, true))
 994             gcc_unreachable ();
 995
 996           emit_move_insn (str_rtx, temp);
 997         }
 998
 999       return;
1000     }
1001
1002   /* Under the C++0x memory model, we must not touch bits outside the
1003      bit region.  Adjust the address to start at the beginning of the
1004      bit region.  */
1005   if (MEM_P (str_rtx) && bitregion_start > 0)
1006     {
1007       machine_mode bestmode;
1008       HOST_WIDE_INT offset, size;
1009
1010       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1011
1012       offset = bitregion_start / BITS_PER_UNIT;
1013       bitnum -= bitregion_start;
1014       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1015       bitregion_end -= bitregion_start;
1016       bitregion_start = 0;
1017       bestmode = get_best_mode (bitsize, bitnum,
1018                                 bitregion_start, bitregion_end,
1019                                 MEM_ALIGN (str_rtx), VOIDmode,
1020                                 MEM_VOLATILE_P (str_rtx));
1021       str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
1022     }
1023
1024   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1025                           bitregion_start, bitregion_end,
1026                           fieldmode, value, true))
1027     gcc_unreachable ();
1028 }
1029 \f
1030 /* Use shifts and boolean operations to store VALUE into a bit field of
1031    width BITSIZE in OP0, starting at bit BITNUM.  */
1032
1033 static void
1034 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1035                        unsigned HOST_WIDE_INT bitnum,
1036                        unsigned HOST_WIDE_INT bitregion_start,
1037                        unsigned HOST_WIDE_INT bitregion_end,
1038                        rtx value)
1039 {
1040   /* There is a case not handled here:
1041      a structure with a known alignment of just a halfword
1042      and a field split across two aligned halfwords within the structure.
1043      Or likewise a structure with a known alignment of just a byte
1044      and a field split across two bytes.
1045      Such cases are not supposed to be able to occur.  */
1046
1047   if (MEM_P (op0))
1048     {
1049       machine_mode mode = GET_MODE (op0);
1050       if (GET_MODE_BITSIZE (mode) == 0
1051           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1052         mode = word_mode;
1053       mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1054                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1055
1056       if (mode == VOIDmode)
1057         {
1058           /* The only way this should occur is if the field spans word
1059              boundaries.  */
1060           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1061                                  bitregion_end, value);
1062           return;
1063         }
1064
1065       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1066     }
1067
1068   store_fixed_bit_field_1 (op0, bitsize, bitnum, value);
1069 }
1070
1071 /* Helper function for store_fixed_bit_field, stores
1072    the bit field always using the MODE of OP0.  */
1073
1074 static void
1075 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1076                          unsigned HOST_WIDE_INT bitnum,
1077                          rtx value)
1078 {
1079   machine_mode mode;
1080   rtx temp;
1081   int all_zero = 0;
1082   int all_one = 0;
1083
1084   mode = GET_MODE (op0);
1085   gcc_assert (SCALAR_INT_MODE_P (mode));
1086
1087   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1088      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1089
1090   if (BYTES_BIG_ENDIAN)
1091     /* BITNUM is the distance between our msb
1092        and that of the containing datum.
1093        Convert it to the distance from the lsb.  */
1094     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1095
1096   /* Now BITNUM is always the distance between our lsb
1097      and that of OP0.  */
1098
1099   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1100      we must first convert its mode to MODE.  */
1101
1102   if (CONST_INT_P (value))
1103     {
1104       unsigned HOST_WIDE_INT v = UINTVAL (value);
1105
1106       if (bitsize < HOST_BITS_PER_WIDE_INT)
1107         v &= ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1;
1108
1109       if (v == 0)
1110         all_zero = 1;
1111       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1112                 && v == ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1)
1113                || (bitsize == HOST_BITS_PER_WIDE_INT
1114                    && v == (unsigned HOST_WIDE_INT) -1))
1115         all_one = 1;
1116
1117       value = lshift_value (mode, v, bitnum);
1118     }
1119   else
1120     {
1121       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1122                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1123
1124       if (GET_MODE (value) != mode)
1125         value = convert_to_mode (mode, value, 1);
1126
1127       if (must_and)
1128         value = expand_binop (mode, and_optab, value,
1129                               mask_rtx (mode, 0, bitsize, 0),
1130                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1131       if (bitnum > 0)
1132         value = expand_shift (LSHIFT_EXPR, mode, value,
1133                               bitnum, NULL_RTX, 1);
1134     }
1135
1136   /* Now clear the chosen bits in OP0,
1137      except that if VALUE is -1 we need not bother.  */
1138   /* We keep the intermediates in registers to allow CSE to combine
1139      consecutive bitfield assignments.  */
1140
1141   temp = force_reg (mode, op0);
1142
1143   if (! all_one)
1144     {
1145       temp = expand_binop (mode, and_optab, temp,
1146                            mask_rtx (mode, bitnum, bitsize, 1),
1147                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1148       temp = force_reg (mode, temp);
1149     }
1150
1151   /* Now logical-or VALUE into OP0, unless it is zero.  */
1152
1153   if (! all_zero)
1154     {
1155       temp = expand_binop (mode, ior_optab, temp, value,
1156                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1157       temp = force_reg (mode, temp);
1158     }
1159
1160   if (op0 != temp)
1161     {
1162       op0 = copy_rtx (op0);
1163       emit_move_insn (op0, temp);
1164     }
1165 }
1166 \f
1167 /* Store a bit field that is split across multiple accessible memory objects.
1168
1169    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1170    BITSIZE is the field width; BITPOS the position of its first bit
1171    (within the word).
1172    VALUE is the value to store.
1173
1174    This does not yet handle fields wider than BITS_PER_WORD.  */
1175
1176 static void
1177 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1178                        unsigned HOST_WIDE_INT bitpos,
1179                        unsigned HOST_WIDE_INT bitregion_start,
1180                        unsigned HOST_WIDE_INT bitregion_end,
1181                        rtx value)
1182 {
1183   unsigned int unit;
1184   unsigned int bitsdone = 0;
1185
1186   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1187      much at a time.  */
1188   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1189     unit = BITS_PER_WORD;
1190   else
1191     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1192
1193   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1194      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1195      again, and we will mutually recurse forever.  */
1196   if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1197     unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1198
1199   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1200      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1201      that VALUE might be a floating-point constant.  */
1202   if (CONSTANT_P (value) && !CONST_INT_P (value))
1203     {
1204       rtx word = gen_lowpart_common (word_mode, value);
1205
1206       if (word && (value != word))
1207         value = word;
1208       else
1209         value = gen_lowpart_common (word_mode,
1210                                     force_reg (GET_MODE (value) != VOIDmode
1211                                                ? GET_MODE (value)
1212                                                : word_mode, value));
1213     }
1214
1215   while (bitsdone < bitsize)
1216     {
1217       unsigned HOST_WIDE_INT thissize;
1218       rtx part, word;
1219       unsigned HOST_WIDE_INT thispos;
1220       unsigned HOST_WIDE_INT offset;
1221
1222       offset = (bitpos + bitsdone) / unit;
1223       thispos = (bitpos + bitsdone) % unit;
1224
1225       /* When region of bytes we can touch is restricted, decrease
1226          UNIT close to the end of the region as needed.  If op0 is a REG
1227          or SUBREG of REG, don't do this, as there can't be data races
1228          on a register and we can expand shorter code in some cases.  */
1229       if (bitregion_end
1230           && unit > BITS_PER_UNIT
1231           && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1232           && !REG_P (op0)
1233           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1234         {
1235           unit = unit / 2;
1236           continue;
1237         }
1238
1239       /* THISSIZE must not overrun a word boundary.  Otherwise,
1240          store_fixed_bit_field will call us again, and we will mutually
1241          recurse forever.  */
1242       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1243       thissize = MIN (thissize, unit - thispos);
1244
1245       if (BYTES_BIG_ENDIAN)
1246         {
1247           /* Fetch successively less significant portions.  */
1248           if (CONST_INT_P (value))
1249             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1250                              >> (bitsize - bitsdone - thissize))
1251                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1252           else
1253             {
1254               int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1255               /* The args are chosen so that the last part includes the
1256                  lsb.  Give extract_bit_field the value it needs (with
1257                  endianness compensation) to fetch the piece we want.  */
1258               part = extract_fixed_bit_field (word_mode, value, thissize,
1259                                               total_bits - bitsize + bitsdone,
1260                                               NULL_RTX, 1);
1261             }
1262         }
1263       else
1264         {
1265           /* Fetch successively more significant portions.  */
1266           if (CONST_INT_P (value))
1267             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1268                              >> bitsdone)
1269                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1270           else
1271             part = extract_fixed_bit_field (word_mode, value, thissize,
1272                                             bitsdone, NULL_RTX, 1);
1273         }
1274
1275       /* If OP0 is a register, then handle OFFSET here.
1276
1277          When handling multiword bitfields, extract_bit_field may pass
1278          down a word_mode SUBREG of a larger REG for a bitfield that actually
1279          crosses a word boundary.  Thus, for a SUBREG, we must find
1280          the current word starting from the base register.  */
1281       if (GET_CODE (op0) == SUBREG)
1282         {
1283           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD)
1284                             + (offset * unit / BITS_PER_WORD);
1285           machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1286           if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1287             word = word_offset ? const0_rtx : op0;
1288           else
1289             word = operand_subword_force (SUBREG_REG (op0), word_offset,
1290                                           GET_MODE (SUBREG_REG (op0)));
1291           offset &= BITS_PER_WORD / unit - 1;
1292         }
1293       else if (REG_P (op0))
1294         {
1295           machine_mode op0_mode = GET_MODE (op0);
1296           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1297             word = offset ? const0_rtx : op0;
1298           else
1299             word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1300                                           GET_MODE (op0));
1301           offset &= BITS_PER_WORD / unit - 1;
1302         }
1303       else
1304         word = op0;
1305
1306       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1307          it is just an out-of-bounds access.  Ignore it.  */
1308       if (word != const0_rtx)
1309         store_fixed_bit_field (word, thissize, offset * unit + thispos,
1310                                bitregion_start, bitregion_end, part);
1311       bitsdone += thissize;
1312     }
1313 }
1314 \f
1315 /* A subroutine of extract_bit_field_1 that converts return value X
1316    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1317    to extract_bit_field.  */
1318
1319 static rtx
1320 convert_extracted_bit_field (rtx x, machine_mode mode,
1321                              machine_mode tmode, bool unsignedp)
1322 {
1323   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1324     return x;
1325
1326   /* If the x mode is not a scalar integral, first convert to the
1327      integer mode of that size and then access it as a floating-point
1328      value via a SUBREG.  */
1329   if (!SCALAR_INT_MODE_P (tmode))
1330     {
1331       machine_mode smode;
1332
1333       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1334       x = convert_to_mode (smode, x, unsignedp);
1335       x = force_reg (smode, x);
1336       return gen_lowpart (tmode, x);
1337     }
1338
1339   return convert_to_mode (tmode, x, unsignedp);
1340 }
1341
1342 /* Try to use an ext(z)v pattern to extract a field from OP0.
1343    Return the extracted value on success, otherwise return null.
1344    EXT_MODE is the mode of the extraction and the other arguments
1345    are as for extract_bit_field.  */
1346
1347 static rtx
1348 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1349                               unsigned HOST_WIDE_INT bitsize,
1350                               unsigned HOST_WIDE_INT bitnum,
1351                               int unsignedp, rtx target,
1352                               machine_mode mode, machine_mode tmode)
1353 {
1354   struct expand_operand ops[4];
1355   rtx spec_target = target;
1356   rtx spec_target_subreg = 0;
1357   machine_mode ext_mode = extv->field_mode;
1358   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1359
1360   if (bitsize == 0 || unit < bitsize)
1361     return NULL_RTX;
1362
1363   if (MEM_P (op0))
1364     /* Get a reference to the first byte of the field.  */
1365     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1366                                 &bitnum);
1367   else
1368     {
1369       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1370       if (BYTES_BIG_ENDIAN)
1371         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1372
1373       /* If op0 is a register, we need it in EXT_MODE to make it
1374          acceptable to the format of ext(z)v.  */
1375       if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1376         return NULL_RTX;
1377       if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1378         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1379     }
1380
1381   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1382      "backwards" from the size of the unit we are extracting from.
1383      Otherwise, we count bits from the most significant on a
1384      BYTES/BITS_BIG_ENDIAN machine.  */
1385
1386   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1387     bitnum = unit - bitsize - bitnum;
1388
1389   if (target == 0)
1390     target = spec_target = gen_reg_rtx (tmode);
1391
1392   if (GET_MODE (target) != ext_mode)
1393     {
1394       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1395          between the mode of the extraction (word_mode) and the target
1396          mode.  Instead, create a temporary and use convert_move to set
1397          the target.  */
1398       if (REG_P (target)
1399           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1400         {
1401           target = gen_lowpart (ext_mode, target);
1402           if (GET_MODE_PRECISION (ext_mode)
1403               > GET_MODE_PRECISION (GET_MODE (spec_target)))
1404             spec_target_subreg = target;
1405         }
1406       else
1407         target = gen_reg_rtx (ext_mode);
1408     }
1409
1410   create_output_operand (&ops[0], target, ext_mode);
1411   create_fixed_operand (&ops[1], op0);
1412   create_integer_operand (&ops[2], bitsize);
1413   create_integer_operand (&ops[3], bitnum);
1414   if (maybe_expand_insn (extv->icode, 4, ops))
1415     {
1416       target = ops[0].value;
1417       if (target == spec_target)
1418         return target;
1419       if (target == spec_target_subreg)
1420         return spec_target;
1421       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1422     }
1423   return NULL_RTX;
1424 }
1425
1426 /* A subroutine of extract_bit_field, with the same arguments.
1427    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1428    if we can find no other means of implementing the operation.
1429    if FALLBACK_P is false, return NULL instead.  */
1430
1431 static rtx
1432 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1433                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1434                      machine_mode mode, machine_mode tmode,
1435                      bool fallback_p)
1436 {
1437   rtx op0 = str_rtx;
1438   machine_mode int_mode;
1439   machine_mode mode1;
1440
1441   if (tmode == VOIDmode)
1442     tmode = mode;
1443
1444   while (GET_CODE (op0) == SUBREG)
1445     {
1446       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1447       op0 = SUBREG_REG (op0);
1448     }
1449
1450   /* If we have an out-of-bounds access to a register, just return an
1451      uninitialized register of the required mode.  This can occur if the
1452      source code contains an out-of-bounds access to a small array.  */
1453   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1454     return gen_reg_rtx (tmode);
1455
1456   if (REG_P (op0)
1457       && mode == GET_MODE (op0)
1458       && bitnum == 0
1459       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1460     {
1461       /* We're trying to extract a full register from itself.  */
1462       return op0;
1463     }
1464
1465   /* See if we can get a better vector mode before extracting.  */
1466   if (VECTOR_MODE_P (GET_MODE (op0))
1467       && !MEM_P (op0)
1468       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1469     {
1470       machine_mode new_mode;
1471
1472       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1473         new_mode = MIN_MODE_VECTOR_FLOAT;
1474       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1475         new_mode = MIN_MODE_VECTOR_FRACT;
1476       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1477         new_mode = MIN_MODE_VECTOR_UFRACT;
1478       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1479         new_mode = MIN_MODE_VECTOR_ACCUM;
1480       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1481         new_mode = MIN_MODE_VECTOR_UACCUM;
1482       else
1483         new_mode = MIN_MODE_VECTOR_INT;
1484
1485       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1486         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1487             && targetm.vector_mode_supported_p (new_mode))
1488           break;
1489       if (new_mode != VOIDmode)
1490         op0 = gen_lowpart (new_mode, op0);
1491     }
1492
1493   /* Use vec_extract patterns for extracting parts of vectors whenever
1494      available.  */
1495   if (VECTOR_MODE_P (GET_MODE (op0))
1496       && !MEM_P (op0)
1497       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1498       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1499           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1500     {
1501       struct expand_operand ops[3];
1502       machine_mode outermode = GET_MODE (op0);
1503       machine_mode innermode = GET_MODE_INNER (outermode);
1504       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1505       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1506
1507       create_output_operand (&ops[0], target, innermode);
1508       create_input_operand (&ops[1], op0, outermode);
1509       create_integer_operand (&ops[2], pos);
1510       if (maybe_expand_insn (icode, 3, ops))
1511         {
1512           target = ops[0].value;
1513           if (GET_MODE (target) != mode)
1514             return gen_lowpart (tmode, target);
1515           return target;
1516         }
1517     }
1518
1519   /* Make sure we are playing with integral modes.  Pun with subregs
1520      if we aren't.  */
1521   {
1522     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1523     if (imode != GET_MODE (op0))
1524       {
1525         if (MEM_P (op0))
1526           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1527         else if (imode != BLKmode)
1528           {
1529             op0 = gen_lowpart (imode, op0);
1530
1531             /* If we got a SUBREG, force it into a register since we
1532                aren't going to be able to do another SUBREG on it.  */
1533             if (GET_CODE (op0) == SUBREG)
1534               op0 = force_reg (imode, op0);
1535           }
1536         else if (REG_P (op0))
1537           {
1538             rtx reg, subreg;
1539             imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1540                                             MODE_INT);
1541             reg = gen_reg_rtx (imode);
1542             subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1543             emit_move_insn (subreg, op0);
1544             op0 = reg;
1545             bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1546           }
1547         else
1548           {
1549             HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1550             rtx mem = assign_stack_temp (GET_MODE (op0), size);
1551             emit_move_insn (mem, op0);
1552             op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1553           }
1554       }
1555   }
1556
1557   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1558      If that's wrong, the solution is to test for it and set TARGET to 0
1559      if needed.  */
1560
1561   /* Get the mode of the field to use for atomic access or subreg
1562      conversion.  */
1563   mode1 = mode;
1564   if (SCALAR_INT_MODE_P (tmode))
1565     {
1566       machine_mode try_mode = mode_for_size (bitsize,
1567                                                   GET_MODE_CLASS (tmode), 0);
1568       if (try_mode != BLKmode)
1569         mode1 = try_mode;
1570     }
1571   gcc_assert (mode1 != BLKmode);
1572
1573   /* Extraction of a full MODE1 value can be done with a subreg as long
1574      as the least significant bit of the value is the least significant
1575      bit of either OP0 or a word of OP0.  */
1576   if (!MEM_P (op0)
1577       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1578       && bitsize == GET_MODE_BITSIZE (mode1)
1579       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1580     {
1581       rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1582                                      bitnum / BITS_PER_UNIT);
1583       if (sub)
1584         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1585     }
1586
1587   /* Extraction of a full MODE1 value can be done with a load as long as
1588      the field is on a byte boundary and is sufficiently aligned.  */
1589   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1590     {
1591       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1592       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1593     }
1594
1595   /* Handle fields bigger than a word.  */
1596
1597   if (bitsize > BITS_PER_WORD)
1598     {
1599       /* Here we transfer the words of the field
1600          in the order least significant first.
1601          This is because the most significant word is the one which may
1602          be less than full.  */
1603
1604       unsigned int backwards = WORDS_BIG_ENDIAN;
1605       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1606       unsigned int i;
1607       rtx_insn *last;
1608
1609       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1610         target = gen_reg_rtx (mode);
1611
1612       /* Indicate for flow that the entire target reg is being set.  */
1613       emit_clobber (target);
1614
1615       last = get_last_insn ();
1616       for (i = 0; i < nwords; i++)
1617         {
1618           /* If I is 0, use the low-order word in both field and target;
1619              if I is 1, use the next to lowest word; and so on.  */
1620           /* Word number in TARGET to use.  */
1621           unsigned int wordnum
1622             = (backwards
1623                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1624                : i);
1625           /* Offset from start of field in OP0.  */
1626           unsigned int bit_offset = (backwards
1627                                      ? MAX ((int) bitsize - ((int) i + 1)
1628                                             * BITS_PER_WORD,
1629                                             0)
1630                                      : (int) i * BITS_PER_WORD);
1631           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1632           rtx result_part
1633             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1634                                              bitsize - i * BITS_PER_WORD),
1635                                    bitnum + bit_offset, 1, target_part,
1636                                    mode, word_mode, fallback_p);
1637
1638           gcc_assert (target_part);
1639           if (!result_part)
1640             {
1641               delete_insns_since (last);
1642               return NULL;
1643             }
1644
1645           if (result_part != target_part)
1646             emit_move_insn (target_part, result_part);
1647         }
1648
1649       if (unsignedp)
1650         {
1651           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1652              need to be zero'd out.  */
1653           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1654             {
1655               unsigned int i, total_words;
1656
1657               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1658               for (i = nwords; i < total_words; i++)
1659                 emit_move_insn
1660                   (operand_subword (target,
1661                                     backwards ? total_words - i - 1 : i,
1662                                     1, VOIDmode),
1663                    const0_rtx);
1664             }
1665           return target;
1666         }
1667
1668       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1669       target = expand_shift (LSHIFT_EXPR, mode, target,
1670                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1671       return expand_shift (RSHIFT_EXPR, mode, target,
1672                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1673     }
1674
1675   /* If OP0 is a multi-word register, narrow it to the affected word.
1676      If the region spans two words, defer to extract_split_bit_field.  */
1677   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1678     {
1679       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1680                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1681       bitnum %= BITS_PER_WORD;
1682       if (bitnum + bitsize > BITS_PER_WORD)
1683         {
1684           if (!fallback_p)
1685             return NULL_RTX;
1686           target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1687           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1688         }
1689     }
1690
1691   /* From here on we know the desired field is smaller than a word.
1692      If OP0 is a register, it too fits within a word.  */
1693   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1694   extraction_insn extv;
1695   if (!MEM_P (op0)
1696       /* ??? We could limit the structure size to the part of OP0 that
1697          contains the field, with appropriate checks for endianness
1698          and TRULY_NOOP_TRUNCATION.  */
1699       && get_best_reg_extraction_insn (&extv, pattern,
1700                                        GET_MODE_BITSIZE (GET_MODE (op0)),
1701                                        tmode))
1702     {
1703       rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1704                                                  unsignedp, target, mode,
1705                                                  tmode);
1706       if (result)
1707         return result;
1708     }
1709
1710   /* If OP0 is a memory, try copying it to a register and seeing if a
1711      cheap register alternative is available.  */
1712   if (MEM_P (op0))
1713     {
1714       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1715                                         tmode))
1716         {
1717           rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1718                                                      bitnum, unsignedp,
1719                                                      target, mode,
1720                                                      tmode);
1721           if (result)
1722             return result;
1723         }
1724
1725       rtx_insn *last = get_last_insn ();
1726
1727       /* Try loading part of OP0 into a register and extracting the
1728          bitfield from that.  */
1729       unsigned HOST_WIDE_INT bitpos;
1730       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1731                                                0, 0, tmode, &bitpos);
1732       if (xop0)
1733         {
1734           xop0 = copy_to_reg (xop0);
1735           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1736                                             unsignedp, target,
1737                                             mode, tmode, false);
1738           if (result)
1739             return result;
1740           delete_insns_since (last);
1741         }
1742     }
1743
1744   if (!fallback_p)
1745     return NULL;
1746
1747   /* Find a correspondingly-sized integer field, so we can apply
1748      shifts and masks to it.  */
1749   int_mode = int_mode_for_mode (tmode);
1750   if (int_mode == BLKmode)
1751     int_mode = int_mode_for_mode (mode);
1752   /* Should probably push op0 out to memory and then do a load.  */
1753   gcc_assert (int_mode != BLKmode);
1754
1755   target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
1756                                     target, unsignedp);
1757   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1758 }
1759
1760 /* Generate code to extract a byte-field from STR_RTX
1761    containing BITSIZE bits, starting at BITNUM,
1762    and put it in TARGET if possible (if TARGET is nonzero).
1763    Regardless of TARGET, we return the rtx for where the value is placed.
1764
1765    STR_RTX is the structure containing the byte (a REG or MEM).
1766    UNSIGNEDP is nonzero if this is an unsigned bit field.
1767    MODE is the natural mode of the field value once extracted.
1768    TMODE is the mode the caller would like the value to have;
1769    but the value may be returned with type MODE instead.
1770
1771    If a TARGET is specified and we can store in it at no extra cost,
1772    we do so, and return TARGET.
1773    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1774    if they are equally easy.  */
1775
1776 rtx
1777 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1778                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1779                    machine_mode mode, machine_mode tmode)
1780 {
1781   machine_mode mode1;
1782
1783   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1784   if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1785     mode1 = GET_MODE (str_rtx);
1786   else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1787     mode1 = GET_MODE (target);
1788   else
1789     mode1 = tmode;
1790
1791   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1792     {
1793       /* Extraction of a full MODE1 value can be done with a load as long as
1794          the field is on a byte boundary and is sufficiently aligned.  */
1795       if (bitsize == GET_MODE_BITSIZE(mode1))
1796         {
1797           rtx result = adjust_bitfield_address (str_rtx, mode1,
1798                                                 bitnum / BITS_PER_UNIT);
1799           return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1800         }
1801
1802       str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1803                                       &bitnum);
1804       str_rtx = copy_to_reg (str_rtx);
1805     }
1806
1807   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1808                               target, mode, tmode, true);
1809 }
1810 \f
1811 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1812    from bit BITNUM of OP0.
1813
1814    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1815    If TARGET is nonzero, attempts to store the value there
1816    and return TARGET, but this is not guaranteed.
1817    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1818
1819 static rtx
1820 extract_fixed_bit_field (machine_mode tmode, rtx op0,
1821                          unsigned HOST_WIDE_INT bitsize,
1822                          unsigned HOST_WIDE_INT bitnum, rtx target,
1823                          int unsignedp)
1824 {
1825   if (MEM_P (op0))
1826     {
1827       machine_mode mode
1828         = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1829                          MEM_VOLATILE_P (op0));
1830
1831       if (mode == VOIDmode)
1832         /* The only way this should occur is if the field spans word
1833            boundaries.  */
1834         return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1835
1836       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1837     }
1838
1839   return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1840                                     target, unsignedp);
1841 }
1842
1843 /* Helper function for extract_fixed_bit_field, extracts
1844    the bit field always using the MODE of OP0.  */
1845
1846 static rtx
1847 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
1848                            unsigned HOST_WIDE_INT bitsize,
1849                            unsigned HOST_WIDE_INT bitnum, rtx target,
1850                            int unsignedp)
1851 {
1852   machine_mode mode = GET_MODE (op0);
1853   gcc_assert (SCALAR_INT_MODE_P (mode));
1854
1855   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1856      for invalid input, such as extract equivalent of f5 from
1857      gcc.dg/pr48335-2.c.  */
1858
1859   if (BYTES_BIG_ENDIAN)
1860     /* BITNUM is the distance between our msb and that of OP0.
1861        Convert it to the distance from the lsb.  */
1862     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1863
1864   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1865      We have reduced the big-endian case to the little-endian case.  */
1866
1867   if (unsignedp)
1868     {
1869       if (bitnum)
1870         {
1871           /* If the field does not already start at the lsb,
1872              shift it so it does.  */
1873           /* Maybe propagate the target for the shift.  */
1874           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1875           if (tmode != mode)
1876             subtarget = 0;
1877           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1878         }
1879       /* Convert the value to the desired mode.  */
1880       if (mode != tmode)
1881         op0 = convert_to_mode (tmode, op0, 1);
1882
1883       /* Unless the msb of the field used to be the msb when we shifted,
1884          mask out the upper bits.  */
1885
1886       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
1887         return expand_binop (GET_MODE (op0), and_optab, op0,
1888                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1889                              target, 1, OPTAB_LIB_WIDEN);
1890       return op0;
1891     }
1892
1893   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1894      then arithmetic-shift its lsb to the lsb of the word.  */
1895   op0 = force_reg (mode, op0);
1896
1897   /* Find the narrowest integer mode that contains the field.  */
1898
1899   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1900        mode = GET_MODE_WIDER_MODE (mode))
1901     if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
1902       {
1903         op0 = convert_to_mode (mode, op0, 0);
1904         break;
1905       }
1906
1907   if (mode != tmode)
1908     target = 0;
1909
1910   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
1911     {
1912       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
1913       /* Maybe propagate the target for the shift.  */
1914       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1915       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1916     }
1917
1918   return expand_shift (RSHIFT_EXPR, mode, op0,
1919                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1920 }
1921
1922 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1923    VALUE << BITPOS.  */
1924
1925 static rtx
1926 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
1927               int bitpos)
1928 {
1929   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
1930 }
1931 \f
1932 /* Extract a bit field that is split across two words
1933    and return an RTX for the result.
1934
1935    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1936    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1937    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1938
1939 static rtx
1940 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1941                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1942 {
1943   unsigned int unit;
1944   unsigned int bitsdone = 0;
1945   rtx result = NULL_RTX;
1946   int first = 1;
1947
1948   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1949      much at a time.  */
1950   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1951     unit = BITS_PER_WORD;
1952   else
1953     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1954
1955   while (bitsdone < bitsize)
1956     {
1957       unsigned HOST_WIDE_INT thissize;
1958       rtx part, word;
1959       unsigned HOST_WIDE_INT thispos;
1960       unsigned HOST_WIDE_INT offset;
1961
1962       offset = (bitpos + bitsdone) / unit;
1963       thispos = (bitpos + bitsdone) % unit;
1964
1965       /* THISSIZE must not overrun a word boundary.  Otherwise,
1966          extract_fixed_bit_field will call us again, and we will mutually
1967          recurse forever.  */
1968       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1969       thissize = MIN (thissize, unit - thispos);
1970
1971       /* If OP0 is a register, then handle OFFSET here.
1972
1973          When handling multiword bitfields, extract_bit_field may pass
1974          down a word_mode SUBREG of a larger REG for a bitfield that actually
1975          crosses a word boundary.  Thus, for a SUBREG, we must find
1976          the current word starting from the base register.  */
1977       if (GET_CODE (op0) == SUBREG)
1978         {
1979           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1980           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1981                                         GET_MODE (SUBREG_REG (op0)));
1982           offset = 0;
1983         }
1984       else if (REG_P (op0))
1985         {
1986           word = operand_subword_force (op0, offset, GET_MODE (op0));
1987           offset = 0;
1988         }
1989       else
1990         word = op0;
1991
1992       /* Extract the parts in bit-counting order,
1993          whose meaning is determined by BYTES_PER_UNIT.
1994          OFFSET is in UNITs, and UNIT is in bits.  */
1995       part = extract_fixed_bit_field (word_mode, word, thissize,
1996                                       offset * unit + thispos, 0, 1);
1997       bitsdone += thissize;
1998
1999       /* Shift this part into place for the result.  */
2000       if (BYTES_BIG_ENDIAN)
2001         {
2002           if (bitsize != bitsdone)
2003             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2004                                  bitsize - bitsdone, 0, 1);
2005         }
2006       else
2007         {
2008           if (bitsdone != thissize)
2009             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2010                                  bitsdone - thissize, 0, 1);
2011         }
2012
2013       if (first)
2014         result = part;
2015       else
2016         /* Combine the parts with bitwise or.  This works
2017            because we extracted each part as an unsigned bit field.  */
2018         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2019                                OPTAB_LIB_WIDEN);
2020
2021       first = 0;
2022     }
2023
2024   /* Unsigned bit field: we are done.  */
2025   if (unsignedp)
2026     return result;
2027   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2028   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2029                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2030   return expand_shift (RSHIFT_EXPR, word_mode, result,
2031                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2032 }
2033 \f
2034 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2035    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2036    MODE, fill the upper bits with zeros.  Fail if the layout of either
2037    mode is unknown (as for CC modes) or if the extraction would involve
2038    unprofitable mode punning.  Return the value on success, otherwise
2039    return null.
2040
2041    This is different from gen_lowpart* in these respects:
2042
2043      - the returned value must always be considered an rvalue
2044
2045      - when MODE is wider than SRC_MODE, the extraction involves
2046        a zero extension
2047
2048      - when MODE is smaller than SRC_MODE, the extraction involves
2049        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2050
2051    In other words, this routine performs a computation, whereas the
2052    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2053    operations.  */
2054
2055 rtx
2056 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2057 {
2058   machine_mode int_mode, src_int_mode;
2059
2060   if (mode == src_mode)
2061     return src;
2062
2063   if (CONSTANT_P (src))
2064     {
2065       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2066          fails, it will happily create (subreg (symbol_ref)) or similar
2067          invalid SUBREGs.  */
2068       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2069       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2070       if (ret)
2071         return ret;
2072
2073       if (GET_MODE (src) == VOIDmode
2074           || !validate_subreg (mode, src_mode, src, byte))
2075         return NULL_RTX;
2076
2077       src = force_reg (GET_MODE (src), src);
2078       return gen_rtx_SUBREG (mode, src, byte);
2079     }
2080
2081   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2082     return NULL_RTX;
2083
2084   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2085       && MODES_TIEABLE_P (mode, src_mode))
2086     {
2087       rtx x = gen_lowpart_common (mode, src);
2088       if (x)
2089         return x;
2090     }
2091
2092   src_int_mode = int_mode_for_mode (src_mode);
2093   int_mode = int_mode_for_mode (mode);
2094   if (src_int_mode == BLKmode || int_mode == BLKmode)
2095     return NULL_RTX;
2096
2097   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2098     return NULL_RTX;
2099   if (!MODES_TIEABLE_P (int_mode, mode))
2100     return NULL_RTX;
2101
2102   src = gen_lowpart (src_int_mode, src);
2103   src = convert_modes (int_mode, src_int_mode, src, true);
2104   src = gen_lowpart (mode, src);
2105   return src;
2106 }
2107 \f
2108 /* Add INC into TARGET.  */
2109
2110 void
2111 expand_inc (rtx target, rtx inc)
2112 {
2113   rtx value = expand_binop (GET_MODE (target), add_optab,
2114                             target, inc,
2115                             target, 0, OPTAB_LIB_WIDEN);
2116   if (value != target)
2117     emit_move_insn (target, value);
2118 }
2119
2120 /* Subtract DEC from TARGET.  */
2121
2122 void
2123 expand_dec (rtx target, rtx dec)
2124 {
2125   rtx value = expand_binop (GET_MODE (target), sub_optab,
2126                             target, dec,
2127                             target, 0, OPTAB_LIB_WIDEN);
2128   if (value != target)
2129     emit_move_insn (target, value);
2130 }
2131 \f
2132 /* Output a shift instruction for expression code CODE,
2133    with SHIFTED being the rtx for the value to shift,
2134    and AMOUNT the rtx for the amount to shift by.
2135    Store the result in the rtx TARGET, if that is convenient.
2136    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2137    Return the rtx for where the value is.  */
2138
2139 static rtx
2140 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2141                 rtx amount, rtx target, int unsignedp)
2142 {
2143   rtx op1, temp = 0;
2144   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2145   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2146   optab lshift_optab = ashl_optab;
2147   optab rshift_arith_optab = ashr_optab;
2148   optab rshift_uns_optab = lshr_optab;
2149   optab lrotate_optab = rotl_optab;
2150   optab rrotate_optab = rotr_optab;
2151   machine_mode op1_mode;
2152   machine_mode scalar_mode = mode;
2153   int attempt;
2154   bool speed = optimize_insn_for_speed_p ();
2155
2156   if (VECTOR_MODE_P (mode))
2157     scalar_mode = GET_MODE_INNER (mode);
2158   op1 = amount;
2159   op1_mode = GET_MODE (op1);
2160
2161   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2162      shift amount is a vector, use the vector/vector shift patterns.  */
2163   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2164     {
2165       lshift_optab = vashl_optab;
2166       rshift_arith_optab = vashr_optab;
2167       rshift_uns_optab = vlshr_optab;
2168       lrotate_optab = vrotl_optab;
2169       rrotate_optab = vrotr_optab;
2170     }
2171
2172   /* Previously detected shift-counts computed by NEGATE_EXPR
2173      and shifted in the other direction; but that does not work
2174      on all machines.  */
2175
2176   if (SHIFT_COUNT_TRUNCATED)
2177     {
2178       if (CONST_INT_P (op1)
2179           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2180               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2181         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2182                        % GET_MODE_BITSIZE (scalar_mode));
2183       else if (GET_CODE (op1) == SUBREG
2184                && subreg_lowpart_p (op1)
2185                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2186                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2187         op1 = SUBREG_REG (op1);
2188     }
2189
2190   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2191      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2192      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2193      amount instead.  */
2194   if (rotate
2195       && CONST_INT_P (op1)
2196       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2197                    GET_MODE_BITSIZE (scalar_mode) - 1))
2198     {
2199       op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2200       left = !left;
2201       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2202     }
2203
2204   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2205      Note that this is not the case for bigger values.  For instance a rotation
2206      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2207      0x04030201 (bswapsi).  */
2208   if (rotate
2209       && CONST_INT_P (op1)
2210       && INTVAL (op1) == BITS_PER_UNIT
2211       && GET_MODE_SIZE (scalar_mode) == 2
2212       && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2213     return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2214                                   unsignedp);
2215
2216   if (op1 == const0_rtx)
2217     return shifted;
2218
2219   /* Check whether its cheaper to implement a left shift by a constant
2220      bit count by a sequence of additions.  */
2221   if (code == LSHIFT_EXPR
2222       && CONST_INT_P (op1)
2223       && INTVAL (op1) > 0
2224       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2225       && INTVAL (op1) < MAX_BITS_PER_WORD
2226       && (shift_cost (speed, mode, INTVAL (op1))
2227           > INTVAL (op1) * add_cost (speed, mode))
2228       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2229     {
2230       int i;
2231       for (i = 0; i < INTVAL (op1); i++)
2232         {
2233           temp = force_reg (mode, shifted);
2234           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2235                                   unsignedp, OPTAB_LIB_WIDEN);
2236         }
2237       return shifted;
2238     }
2239
2240   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2241     {
2242       enum optab_methods methods;
2243
2244       if (attempt == 0)
2245         methods = OPTAB_DIRECT;
2246       else if (attempt == 1)
2247         methods = OPTAB_WIDEN;
2248       else
2249         methods = OPTAB_LIB_WIDEN;
2250
2251       if (rotate)
2252         {
2253           /* Widening does not work for rotation.  */
2254           if (methods == OPTAB_WIDEN)
2255             continue;
2256           else if (methods == OPTAB_LIB_WIDEN)
2257             {
2258               /* If we have been unable to open-code this by a rotation,
2259                  do it as the IOR of two shifts.  I.e., to rotate A
2260                  by N bits, compute
2261                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2262                  where C is the bitsize of A.
2263
2264                  It is theoretically possible that the target machine might
2265                  not be able to perform either shift and hence we would
2266                  be making two libcalls rather than just the one for the
2267                  shift (similarly if IOR could not be done).  We will allow
2268                  this extremely unlikely lossage to avoid complicating the
2269                  code below.  */
2270
2271               rtx subtarget = target == shifted ? 0 : target;
2272               rtx new_amount, other_amount;
2273               rtx temp1;
2274
2275               new_amount = op1;
2276               if (op1 == const0_rtx)
2277                 return shifted;
2278               else if (CONST_INT_P (op1))
2279                 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2280                                         - INTVAL (op1));
2281               else
2282                 {
2283                   other_amount
2284                     = simplify_gen_unary (NEG, GET_MODE (op1),
2285                                           op1, GET_MODE (op1));
2286                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2287                   other_amount
2288                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2289                                            gen_int_mode (mask, GET_MODE (op1)));
2290                 }
2291
2292               shifted = force_reg (mode, shifted);
2293
2294               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2295                                      mode, shifted, new_amount, 0, 1);
2296               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2297                                       mode, shifted, other_amount,
2298                                       subtarget, 1);
2299               return expand_binop (mode, ior_optab, temp, temp1, target,
2300                                    unsignedp, methods);
2301             }
2302
2303           temp = expand_binop (mode,
2304                                left ? lrotate_optab : rrotate_optab,
2305                                shifted, op1, target, unsignedp, methods);
2306         }
2307       else if (unsignedp)
2308         temp = expand_binop (mode,
2309                              left ? lshift_optab : rshift_uns_optab,
2310                              shifted, op1, target, unsignedp, methods);
2311
2312       /* Do arithmetic shifts.
2313          Also, if we are going to widen the operand, we can just as well
2314          use an arithmetic right-shift instead of a logical one.  */
2315       if (temp == 0 && ! rotate
2316           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2317         {
2318           enum optab_methods methods1 = methods;
2319
2320           /* If trying to widen a log shift to an arithmetic shift,
2321              don't accept an arithmetic shift of the same size.  */
2322           if (unsignedp)
2323             methods1 = OPTAB_MUST_WIDEN;
2324
2325           /* Arithmetic shift */
2326
2327           temp = expand_binop (mode,
2328                                left ? lshift_optab : rshift_arith_optab,
2329                                shifted, op1, target, unsignedp, methods1);
2330         }
2331
2332       /* We used to try extzv here for logical right shifts, but that was
2333          only useful for one machine, the VAX, and caused poor code
2334          generation there for lshrdi3, so the code was deleted and a
2335          define_expand for lshrsi3 was added to vax.md.  */
2336     }
2337
2338   gcc_assert (temp);
2339   return temp;
2340 }
2341
2342 /* Output a shift instruction for expression code CODE,
2343    with SHIFTED being the rtx for the value to shift,
2344    and AMOUNT the amount to shift by.
2345    Store the result in the rtx TARGET, if that is convenient.
2346    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2347    Return the rtx for where the value is.  */
2348
2349 rtx
2350 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2351               int amount, rtx target, int unsignedp)
2352 {
2353   return expand_shift_1 (code, mode,
2354                          shifted, GEN_INT (amount), target, unsignedp);
2355 }
2356
2357 /* Output a shift instruction for expression code CODE,
2358    with SHIFTED being the rtx for the value to shift,
2359    and AMOUNT the tree for the amount to shift by.
2360    Store the result in the rtx TARGET, if that is convenient.
2361    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2362    Return the rtx for where the value is.  */
2363
2364 rtx
2365 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2366                        tree amount, rtx target, int unsignedp)
2367 {
2368   return expand_shift_1 (code, mode,
2369                          shifted, expand_normal (amount), target, unsignedp);
2370 }
2371
2372 \f
2373 /* Indicates the type of fixup needed after a constant multiplication.
2374    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2375    the result should be negated, and ADD_VARIANT means that the
2376    multiplicand should be added to the result.  */
2377 enum mult_variant {basic_variant, negate_variant, add_variant};
2378
2379 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2380                         const struct mult_cost *, machine_mode mode);
2381 static bool choose_mult_variant (machine_mode, HOST_WIDE_INT,
2382                                  struct algorithm *, enum mult_variant *, int);
2383 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2384                               const struct algorithm *, enum mult_variant);
2385 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2386 static rtx extract_high_half (machine_mode, rtx);
2387 static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2388 static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
2389                                        int, int);
2390 /* Compute and return the best algorithm for multiplying by T.
2391    The algorithm must cost less than cost_limit
2392    If retval.cost >= COST_LIMIT, no algorithm was found and all
2393    other field of the returned struct are undefined.
2394    MODE is the machine mode of the multiplication.  */
2395
2396 static void
2397 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2398             const struct mult_cost *cost_limit, machine_mode mode)
2399 {
2400   int m;
2401   struct algorithm *alg_in, *best_alg;
2402   struct mult_cost best_cost;
2403   struct mult_cost new_limit;
2404   int op_cost, op_latency;
2405   unsigned HOST_WIDE_INT orig_t = t;
2406   unsigned HOST_WIDE_INT q;
2407   int maxm, hash_index;
2408   bool cache_hit = false;
2409   enum alg_code cache_alg = alg_zero;
2410   bool speed = optimize_insn_for_speed_p ();
2411   machine_mode imode;
2412   struct alg_hash_entry *entry_ptr;
2413
2414   /* Indicate that no algorithm is yet found.  If no algorithm
2415      is found, this value will be returned and indicate failure.  */
2416   alg_out->cost.cost = cost_limit->cost + 1;
2417   alg_out->cost.latency = cost_limit->latency + 1;
2418
2419   if (cost_limit->cost < 0
2420       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2421     return;
2422
2423   /* Be prepared for vector modes.  */
2424   imode = GET_MODE_INNER (mode);
2425   if (imode == VOIDmode)
2426     imode = mode;
2427
2428   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2429
2430   /* Restrict the bits of "t" to the multiplication's mode.  */
2431   t &= GET_MODE_MASK (imode);
2432
2433   /* t == 1 can be done in zero cost.  */
2434   if (t == 1)
2435     {
2436       alg_out->ops = 1;
2437       alg_out->cost.cost = 0;
2438       alg_out->cost.latency = 0;
2439       alg_out->op[0] = alg_m;
2440       return;
2441     }
2442
2443   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2444      fail now.  */
2445   if (t == 0)
2446     {
2447       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2448         return;
2449       else
2450         {
2451           alg_out->ops = 1;
2452           alg_out->cost.cost = zero_cost (speed);
2453           alg_out->cost.latency = zero_cost (speed);
2454           alg_out->op[0] = alg_zero;
2455           return;
2456         }
2457     }
2458
2459   /* We'll be needing a couple extra algorithm structures now.  */
2460
2461   alg_in = XALLOCA (struct algorithm);
2462   best_alg = XALLOCA (struct algorithm);
2463   best_cost = *cost_limit;
2464
2465   /* Compute the hash index.  */
2466   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2467
2468   /* See if we already know what to do for T.  */
2469   entry_ptr = alg_hash_entry_ptr (hash_index);
2470   if (entry_ptr->t == t
2471       && entry_ptr->mode == mode
2472       && entry_ptr->mode == mode
2473       && entry_ptr->speed == speed
2474       && entry_ptr->alg != alg_unknown)
2475     {
2476       cache_alg = entry_ptr->alg;
2477
2478       if (cache_alg == alg_impossible)
2479         {
2480           /* The cache tells us that it's impossible to synthesize
2481              multiplication by T within entry_ptr->cost.  */
2482           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2483             /* COST_LIMIT is at least as restrictive as the one
2484                recorded in the hash table, in which case we have no
2485                hope of synthesizing a multiplication.  Just
2486                return.  */
2487             return;
2488
2489           /* If we get here, COST_LIMIT is less restrictive than the
2490              one recorded in the hash table, so we may be able to
2491              synthesize a multiplication.  Proceed as if we didn't
2492              have the cache entry.  */
2493         }
2494       else
2495         {
2496           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2497             /* The cached algorithm shows that this multiplication
2498                requires more cost than COST_LIMIT.  Just return.  This
2499                way, we don't clobber this cache entry with
2500                alg_impossible but retain useful information.  */
2501             return;
2502
2503           cache_hit = true;
2504
2505           switch (cache_alg)
2506             {
2507             case alg_shift:
2508               goto do_alg_shift;
2509
2510             case alg_add_t_m2:
2511             case alg_sub_t_m2:
2512               goto do_alg_addsub_t_m2;
2513
2514             case alg_add_factor:
2515             case alg_sub_factor:
2516               goto do_alg_addsub_factor;
2517
2518             case alg_add_t2_m:
2519               goto do_alg_add_t2_m;
2520
2521             case alg_sub_t2_m:
2522               goto do_alg_sub_t2_m;
2523
2524             default:
2525               gcc_unreachable ();
2526             }
2527         }
2528     }
2529
2530   /* If we have a group of zero bits at the low-order part of T, try
2531      multiplying by the remaining bits and then doing a shift.  */
2532
2533   if ((t & 1) == 0)
2534     {
2535     do_alg_shift:
2536       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2537       if (m < maxm)
2538         {
2539           q = t >> m;
2540           /* The function expand_shift will choose between a shift and
2541              a sequence of additions, so the observed cost is given as
2542              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2543           op_cost = m * add_cost (speed, mode);
2544           if (shift_cost (speed, mode, m) < op_cost)
2545             op_cost = shift_cost (speed, mode, m);
2546           new_limit.cost = best_cost.cost - op_cost;
2547           new_limit.latency = best_cost.latency - op_cost;
2548           synth_mult (alg_in, q, &new_limit, mode);
2549
2550           alg_in->cost.cost += op_cost;
2551           alg_in->cost.latency += op_cost;
2552           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2553             {
2554               best_cost = alg_in->cost;
2555               std::swap (alg_in, best_alg);
2556               best_alg->log[best_alg->ops] = m;
2557               best_alg->op[best_alg->ops] = alg_shift;
2558             }
2559
2560           /* See if treating ORIG_T as a signed number yields a better
2561              sequence.  Try this sequence only for a negative ORIG_T
2562              as it would be useless for a non-negative ORIG_T.  */
2563           if ((HOST_WIDE_INT) orig_t < 0)
2564             {
2565               /* Shift ORIG_T as follows because a right shift of a
2566                  negative-valued signed type is implementation
2567                  defined.  */
2568               q = ~(~orig_t >> m);
2569               /* The function expand_shift will choose between a shift
2570                  and a sequence of additions, so the observed cost is
2571                  given as MIN (m * add_cost(speed, mode),
2572                  shift_cost(speed, mode, m)).  */
2573               op_cost = m * add_cost (speed, mode);
2574               if (shift_cost (speed, mode, m) < op_cost)
2575                 op_cost = shift_cost (speed, mode, m);
2576               new_limit.cost = best_cost.cost - op_cost;
2577               new_limit.latency = best_cost.latency - op_cost;
2578               synth_mult (alg_in, q, &new_limit, mode);
2579
2580               alg_in->cost.cost += op_cost;
2581               alg_in->cost.latency += op_cost;
2582               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2583                 {
2584                   best_cost = alg_in->cost;
2585                   std::swap (alg_in, best_alg);
2586                   best_alg->log[best_alg->ops] = m;
2587                   best_alg->op[best_alg->ops] = alg_shift;
2588                 }
2589             }
2590         }
2591       if (cache_hit)
2592         goto done;
2593     }
2594
2595   /* If we have an odd number, add or subtract one.  */
2596   if ((t & 1) != 0)
2597     {
2598       unsigned HOST_WIDE_INT w;
2599
2600     do_alg_addsub_t_m2:
2601       for (w = 1; (w & t) != 0; w <<= 1)
2602         ;
2603       /* If T was -1, then W will be zero after the loop.  This is another
2604          case where T ends with ...111.  Handling this with (T + 1) and
2605          subtract 1 produces slightly better code and results in algorithm
2606          selection much faster than treating it like the ...0111 case
2607          below.  */
2608       if (w == 0
2609           || (w > 2
2610               /* Reject the case where t is 3.
2611                  Thus we prefer addition in that case.  */
2612               && t != 3))
2613         {
2614           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2615
2616           op_cost = add_cost (speed, mode);
2617           new_limit.cost = best_cost.cost - op_cost;
2618           new_limit.latency = best_cost.latency - op_cost;
2619           synth_mult (alg_in, t + 1, &new_limit, mode);
2620
2621           alg_in->cost.cost += op_cost;
2622           alg_in->cost.latency += op_cost;
2623           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2624             {
2625               best_cost = alg_in->cost;
2626               std::swap (alg_in, best_alg);
2627               best_alg->log[best_alg->ops] = 0;
2628               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2629             }
2630         }
2631       else
2632         {
2633           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2634
2635           op_cost = add_cost (speed, mode);
2636           new_limit.cost = best_cost.cost - op_cost;
2637           new_limit.latency = best_cost.latency - op_cost;
2638           synth_mult (alg_in, t - 1, &new_limit, mode);
2639
2640           alg_in->cost.cost += op_cost;
2641           alg_in->cost.latency += op_cost;
2642           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2643             {
2644               best_cost = alg_in->cost;
2645               std::swap (alg_in, best_alg);
2646               best_alg->log[best_alg->ops] = 0;
2647               best_alg->op[best_alg->ops] = alg_add_t_m2;
2648             }
2649         }
2650
2651       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2652          quickly with a - a * n for some appropriate constant n.  */
2653       m = exact_log2 (-orig_t + 1);
2654       if (m >= 0 && m < maxm)
2655         {
2656           op_cost = shiftsub1_cost (speed, mode, m);
2657           new_limit.cost = best_cost.cost - op_cost;
2658           new_limit.latency = best_cost.latency - op_cost;
2659           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2660                       &new_limit, mode);
2661
2662           alg_in->cost.cost += op_cost;
2663           alg_in->cost.latency += op_cost;
2664           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2665             {
2666               best_cost = alg_in->cost;
2667               std::swap (alg_in, best_alg);
2668               best_alg->log[best_alg->ops] = m;
2669               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2670             }
2671         }
2672
2673       if (cache_hit)
2674         goto done;
2675     }
2676
2677   /* Look for factors of t of the form
2678      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2679      If we find such a factor, we can multiply by t using an algorithm that
2680      multiplies by q, shift the result by m and add/subtract it to itself.
2681
2682      We search for large factors first and loop down, even if large factors
2683      are less probable than small; if we find a large factor we will find a
2684      good sequence quickly, and therefore be able to prune (by decreasing
2685      COST_LIMIT) the search.  */
2686
2687  do_alg_addsub_factor:
2688   for (m = floor_log2 (t - 1); m >= 2; m--)
2689     {
2690       unsigned HOST_WIDE_INT d;
2691
2692       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2693       if (t % d == 0 && t > d && m < maxm
2694           && (!cache_hit || cache_alg == alg_add_factor))
2695         {
2696           /* If the target has a cheap shift-and-add instruction use
2697              that in preference to a shift insn followed by an add insn.
2698              Assume that the shift-and-add is "atomic" with a latency
2699              equal to its cost, otherwise assume that on superscalar
2700              hardware the shift may be executed concurrently with the
2701              earlier steps in the algorithm.  */
2702           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2703           if (shiftadd_cost (speed, mode, m) < op_cost)
2704             {
2705               op_cost = shiftadd_cost (speed, mode, m);
2706               op_latency = op_cost;
2707             }
2708           else
2709             op_latency = add_cost (speed, mode);
2710
2711           new_limit.cost = best_cost.cost - op_cost;
2712           new_limit.latency = best_cost.latency - op_latency;
2713           synth_mult (alg_in, t / d, &new_limit, mode);
2714
2715           alg_in->cost.cost += op_cost;
2716           alg_in->cost.latency += op_latency;
2717           if (alg_in->cost.latency < op_cost)
2718             alg_in->cost.latency = op_cost;
2719           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2720             {
2721               best_cost = alg_in->cost;
2722               std::swap (alg_in, best_alg);
2723               best_alg->log[best_alg->ops] = m;
2724               best_alg->op[best_alg->ops] = alg_add_factor;
2725             }
2726           /* Other factors will have been taken care of in the recursion.  */
2727           break;
2728         }
2729
2730       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2731       if (t % d == 0 && t > d && m < maxm
2732           && (!cache_hit || cache_alg == alg_sub_factor))
2733         {
2734           /* If the target has a cheap shift-and-subtract insn use
2735              that in preference to a shift insn followed by a sub insn.
2736              Assume that the shift-and-sub is "atomic" with a latency
2737              equal to it's cost, otherwise assume that on superscalar
2738              hardware the shift may be executed concurrently with the
2739              earlier steps in the algorithm.  */
2740           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2741           if (shiftsub0_cost (speed, mode, m) < op_cost)
2742             {
2743               op_cost = shiftsub0_cost (speed, mode, m);
2744               op_latency = op_cost;
2745             }
2746           else
2747             op_latency = add_cost (speed, mode);
2748
2749           new_limit.cost = best_cost.cost - op_cost;
2750           new_limit.latency = best_cost.latency - op_latency;
2751           synth_mult (alg_in, t / d, &new_limit, mode);
2752
2753           alg_in->cost.cost += op_cost;
2754           alg_in->cost.latency += op_latency;
2755           if (alg_in->cost.latency < op_cost)
2756             alg_in->cost.latency = op_cost;
2757           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2758             {
2759               best_cost = alg_in->cost;
2760               std::swap (alg_in, best_alg);
2761               best_alg->log[best_alg->ops] = m;
2762               best_alg->op[best_alg->ops] = alg_sub_factor;
2763             }
2764           break;
2765         }
2766     }
2767   if (cache_hit)
2768     goto done;
2769
2770   /* Try shift-and-add (load effective address) instructions,
2771      i.e. do a*3, a*5, a*9.  */
2772   if ((t & 1) != 0)
2773     {
2774     do_alg_add_t2_m:
2775       q = t - 1;
2776       q = q & -q;
2777       m = exact_log2 (q);
2778       if (m >= 0 && m < maxm)
2779         {
2780           op_cost = shiftadd_cost (speed, mode, m);
2781           new_limit.cost = best_cost.cost - op_cost;
2782           new_limit.latency = best_cost.latency - op_cost;
2783           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2784
2785           alg_in->cost.cost += op_cost;
2786           alg_in->cost.latency += op_cost;
2787           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2788             {
2789               best_cost = alg_in->cost;
2790               std::swap (alg_in, best_alg);
2791               best_alg->log[best_alg->ops] = m;
2792               best_alg->op[best_alg->ops] = alg_add_t2_m;
2793             }
2794         }
2795       if (cache_hit)
2796         goto done;
2797
2798     do_alg_sub_t2_m:
2799       q = t + 1;
2800       q = q & -q;
2801       m = exact_log2 (q);
2802       if (m >= 0 && m < maxm)
2803         {
2804           op_cost = shiftsub0_cost (speed, mode, m);
2805           new_limit.cost = best_cost.cost - op_cost;
2806           new_limit.latency = best_cost.latency - op_cost;
2807           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2808
2809           alg_in->cost.cost += op_cost;
2810           alg_in->cost.latency += op_cost;
2811           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2812             {
2813               best_cost = alg_in->cost;
2814               std::swap (alg_in, best_alg);
2815               best_alg->log[best_alg->ops] = m;
2816               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2817             }
2818         }
2819       if (cache_hit)
2820         goto done;
2821     }
2822
2823  done:
2824   /* If best_cost has not decreased, we have not found any algorithm.  */
2825   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2826     {
2827       /* We failed to find an algorithm.  Record alg_impossible for
2828          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2829          we are asked to find an algorithm for T within the same or
2830          lower COST_LIMIT, we can immediately return to the
2831          caller.  */
2832       entry_ptr->t = t;
2833       entry_ptr->mode = mode;
2834       entry_ptr->speed = speed;
2835       entry_ptr->alg = alg_impossible;
2836       entry_ptr->cost = *cost_limit;
2837       return;
2838     }
2839
2840   /* Cache the result.  */
2841   if (!cache_hit)
2842     {
2843       entry_ptr->t = t;
2844       entry_ptr->mode = mode;
2845       entry_ptr->speed = speed;
2846       entry_ptr->alg = best_alg->op[best_alg->ops];
2847       entry_ptr->cost.cost = best_cost.cost;
2848       entry_ptr->cost.latency = best_cost.latency;
2849     }
2850
2851   /* If we are getting a too long sequence for `struct algorithm'
2852      to record, make this search fail.  */
2853   if (best_alg->ops == MAX_BITS_PER_WORD)
2854     return;
2855
2856   /* Copy the algorithm from temporary space to the space at alg_out.
2857      We avoid using structure assignment because the majority of
2858      best_alg is normally undefined, and this is a critical function.  */
2859   alg_out->ops = best_alg->ops + 1;
2860   alg_out->cost = best_cost;
2861   memcpy (alg_out->op, best_alg->op,
2862           alg_out->ops * sizeof *alg_out->op);
2863   memcpy (alg_out->log, best_alg->log,
2864           alg_out->ops * sizeof *alg_out->log);
2865 }
2866 \f
2867 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2868    Try three variations:
2869
2870        - a shift/add sequence based on VAL itself
2871        - a shift/add sequence based on -VAL, followed by a negation
2872        - a shift/add sequence based on VAL - 1, followed by an addition.
2873
2874    Return true if the cheapest of these cost less than MULT_COST,
2875    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2876
2877 static bool
2878 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
2879                      struct algorithm *alg, enum mult_variant *variant,
2880                      int mult_cost)
2881 {
2882   struct algorithm alg2;
2883   struct mult_cost limit;
2884   int op_cost;
2885   bool speed = optimize_insn_for_speed_p ();
2886
2887   /* Fail quickly for impossible bounds.  */
2888   if (mult_cost < 0)
2889     return false;
2890
2891   /* Ensure that mult_cost provides a reasonable upper bound.
2892      Any constant multiplication can be performed with less
2893      than 2 * bits additions.  */
2894   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2895   if (mult_cost > op_cost)
2896     mult_cost = op_cost;
2897
2898   *variant = basic_variant;
2899   limit.cost = mult_cost;
2900   limit.latency = mult_cost;
2901   synth_mult (alg, val, &limit, mode);
2902
2903   /* This works only if the inverted value actually fits in an
2904      `unsigned int' */
2905   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2906     {
2907       op_cost = neg_cost (speed, mode);
2908       if (MULT_COST_LESS (&alg->cost, mult_cost))
2909         {
2910           limit.cost = alg->cost.cost - op_cost;
2911           limit.latency = alg->cost.latency - op_cost;
2912         }
2913       else
2914         {
2915           limit.cost = mult_cost - op_cost;
2916           limit.latency = mult_cost - op_cost;
2917         }
2918
2919       synth_mult (&alg2, -val, &limit, mode);
2920       alg2.cost.cost += op_cost;
2921       alg2.cost.latency += op_cost;
2922       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2923         *alg = alg2, *variant = negate_variant;
2924     }
2925
2926   /* This proves very useful for division-by-constant.  */
2927   op_cost = add_cost (speed, mode);
2928   if (MULT_COST_LESS (&alg->cost, mult_cost))
2929     {
2930       limit.cost = alg->cost.cost - op_cost;
2931       limit.latency = alg->cost.latency - op_cost;
2932     }
2933   else
2934     {
2935       limit.cost = mult_cost - op_cost;
2936       limit.latency = mult_cost - op_cost;
2937     }
2938
2939   synth_mult (&alg2, val - 1, &limit, mode);
2940   alg2.cost.cost += op_cost;
2941   alg2.cost.latency += op_cost;
2942   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2943     *alg = alg2, *variant = add_variant;
2944
2945   return MULT_COST_LESS (&alg->cost, mult_cost);
2946 }
2947
2948 /* A subroutine of expand_mult, used for constant multiplications.
2949    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2950    convenient.  Use the shift/add sequence described by ALG and apply
2951    the final fixup specified by VARIANT.  */
2952
2953 static rtx
2954 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
2955                    rtx target, const struct algorithm *alg,
2956                    enum mult_variant variant)
2957 {
2958   HOST_WIDE_INT val_so_far;
2959   rtx_insn *insn;
2960   rtx accum, tem;
2961   int opno;
2962   machine_mode nmode;
2963
2964   /* Avoid referencing memory over and over and invalid sharing
2965      on SUBREGs.  */
2966   op0 = force_reg (mode, op0);
2967
2968   /* ACCUM starts out either as OP0 or as a zero, depending on
2969      the first operation.  */
2970
2971   if (alg->op[0] == alg_zero)
2972     {
2973       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2974       val_so_far = 0;
2975     }
2976   else if (alg->op[0] == alg_m)
2977     {
2978       accum = copy_to_mode_reg (mode, op0);
2979       val_so_far = 1;
2980     }
2981   else
2982     gcc_unreachable ();
2983
2984   for (opno = 1; opno < alg->ops; opno++)
2985     {
2986       int log = alg->log[opno];
2987       rtx shift_subtarget = optimize ? 0 : accum;
2988       rtx add_target
2989         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2990            && !optimize)
2991           ? target : 0;
2992       rtx accum_target = optimize ? 0 : accum;
2993       rtx accum_inner;
2994
2995       switch (alg->op[opno])
2996         {
2997         case alg_shift:
2998           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2999           /* REG_EQUAL note will be attached to the following insn.  */
3000           emit_move_insn (accum, tem);
3001           val_so_far <<= log;
3002           break;
3003
3004         case alg_add_t_m2:
3005           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3006           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3007                                  add_target ? add_target : accum_target);
3008           val_so_far += (HOST_WIDE_INT) 1 << log;
3009           break;
3010
3011         case alg_sub_t_m2:
3012           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3013           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3014                                  add_target ? add_target : accum_target);
3015           val_so_far -= (HOST_WIDE_INT) 1 << log;
3016           break;
3017
3018         case alg_add_t2_m:
3019           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3020                                 log, shift_subtarget, 0);
3021           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3022                                  add_target ? add_target : accum_target);
3023           val_so_far = (val_so_far << log) + 1;
3024           break;
3025
3026         case alg_sub_t2_m:
3027           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3028                                 log, shift_subtarget, 0);
3029           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3030                                  add_target ? add_target : accum_target);
3031           val_so_far = (val_so_far << log) - 1;
3032           break;
3033
3034         case alg_add_factor:
3035           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3036           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3037                                  add_target ? add_target : accum_target);
3038           val_so_far += val_so_far << log;
3039           break;
3040
3041         case alg_sub_factor:
3042           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3043           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3044                                  (add_target
3045                                   ? add_target : (optimize ? 0 : tem)));
3046           val_so_far = (val_so_far << log) - val_so_far;
3047           break;
3048
3049         default:
3050           gcc_unreachable ();
3051         }
3052
3053       if (SCALAR_INT_MODE_P (mode))
3054         {
3055           /* Write a REG_EQUAL note on the last insn so that we can cse
3056              multiplication sequences.  Note that if ACCUM is a SUBREG,
3057              we've set the inner register and must properly indicate that.  */
3058           tem = op0, nmode = mode;
3059           accum_inner = accum;
3060           if (GET_CODE (accum) == SUBREG)
3061             {
3062               accum_inner = SUBREG_REG (accum);
3063               nmode = GET_MODE (accum_inner);
3064               tem = gen_lowpart (nmode, op0);
3065             }
3066
3067           insn = get_last_insn ();
3068           set_dst_reg_note (insn, REG_EQUAL,
3069                             gen_rtx_MULT (nmode, tem,
3070                                           gen_int_mode (val_so_far, nmode)),
3071                             accum_inner);
3072         }
3073     }
3074
3075   if (variant == negate_variant)
3076     {
3077       val_so_far = -val_so_far;
3078       accum = expand_unop (mode, neg_optab, accum, target, 0);
3079     }
3080   else if (variant == add_variant)
3081     {
3082       val_so_far = val_so_far + 1;
3083       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3084     }
3085
3086   /* Compare only the bits of val and val_so_far that are significant
3087      in the result mode, to avoid sign-/zero-extension confusion.  */
3088   nmode = GET_MODE_INNER (mode);
3089   if (nmode == VOIDmode)
3090     nmode = mode;
3091   val &= GET_MODE_MASK (nmode);
3092   val_so_far &= GET_MODE_MASK (nmode);
3093   gcc_assert (val == val_so_far);
3094
3095   return accum;
3096 }
3097
3098 /* Perform a multiplication and return an rtx for the result.
3099    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3100    TARGET is a suggestion for where to store the result (an rtx).
3101
3102    We check specially for a constant integer as OP1.
3103    If you want this check for OP0 as well, then before calling
3104    you should swap the two operands if OP0 would be constant.  */
3105
3106 rtx
3107 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3108              int unsignedp)
3109 {
3110   enum mult_variant variant;
3111   struct algorithm algorithm;
3112   rtx scalar_op1;
3113   int max_cost;
3114   bool speed = optimize_insn_for_speed_p ();
3115   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3116
3117   if (CONSTANT_P (op0))
3118     std::swap (op0, op1);
3119
3120   /* For vectors, there are several simplifications that can be made if
3121      all elements of the vector constant are identical.  */
3122   scalar_op1 = op1;
3123   if (GET_CODE (op1) == CONST_VECTOR)
3124     {
3125       int i, n = CONST_VECTOR_NUNITS (op1);
3126       scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3127       for (i = 1; i < n; ++i)
3128         if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3129           goto skip_scalar;
3130     }
3131
3132   if (INTEGRAL_MODE_P (mode))
3133     {
3134       rtx fake_reg;
3135       HOST_WIDE_INT coeff;
3136       bool is_neg;
3137       int mode_bitsize;
3138
3139       if (op1 == CONST0_RTX (mode))
3140         return op1;
3141       if (op1 == CONST1_RTX (mode))
3142         return op0;
3143       if (op1 == CONSTM1_RTX (mode))
3144         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3145                             op0, target, 0);
3146
3147       if (do_trapv)
3148         goto skip_synth;
3149
3150       /* If mode is integer vector mode, check if the backend supports
3151          vector lshift (by scalar or vector) at all.  If not, we can't use
3152          synthetized multiply.  */
3153       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3154           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3155           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3156         goto skip_synth;
3157
3158       /* These are the operations that are potentially turned into
3159          a sequence of shifts and additions.  */
3160       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3161
3162       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3163          less than or equal in size to `unsigned int' this doesn't matter.
3164          If the mode is larger than `unsigned int', then synth_mult works
3165          only if the constant value exactly fits in an `unsigned int' without
3166          any truncation.  This means that multiplying by negative values does
3167          not work; results are off by 2^32 on a 32 bit machine.  */
3168       if (CONST_INT_P (scalar_op1))
3169         {
3170           coeff = INTVAL (scalar_op1);
3171           is_neg = coeff < 0;
3172         }
3173 #if TARGET_SUPPORTS_WIDE_INT
3174       else if (CONST_WIDE_INT_P (scalar_op1))
3175 #else
3176       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3177 #endif
3178         {
3179           int shift = wi::exact_log2 (std::make_pair (scalar_op1, mode));
3180           /* Perfect power of 2 (other than 1, which is handled above).  */
3181           if (shift > 0)
3182             return expand_shift (LSHIFT_EXPR, mode, op0,
3183                                  shift, target, unsignedp);
3184           else
3185             goto skip_synth;
3186         }
3187       else
3188         goto skip_synth;
3189
3190       /* We used to test optimize here, on the grounds that it's better to
3191          produce a smaller program when -O is not used.  But this causes
3192          such a terrible slowdown sometimes that it seems better to always
3193          use synth_mult.  */
3194
3195       /* Special case powers of two.  */
3196       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3197           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3198         return expand_shift (LSHIFT_EXPR, mode, op0,
3199                              floor_log2 (coeff), target, unsignedp);
3200
3201       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3202
3203       /* Attempt to handle multiplication of DImode values by negative
3204          coefficients, by performing the multiplication by a positive
3205          multiplier and then inverting the result.  */
3206       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3207         {
3208           /* Its safe to use -coeff even for INT_MIN, as the
3209              result is interpreted as an unsigned coefficient.
3210              Exclude cost of op0 from max_cost to match the cost
3211              calculation of the synth_mult.  */
3212           coeff = -(unsigned HOST_WIDE_INT) coeff;
3213           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3214                       - neg_cost (speed, mode));
3215           if (max_cost <= 0)
3216             goto skip_synth;
3217
3218           /* Special case powers of two.  */
3219           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3220             {
3221               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3222                                        floor_log2 (coeff), target, unsignedp);
3223               return expand_unop (mode, neg_optab, temp, target, 0);
3224             }
3225
3226           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3227                                    max_cost))
3228             {
3229               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3230                                             &algorithm, variant);
3231               return expand_unop (mode, neg_optab, temp, target, 0);
3232             }
3233           goto skip_synth;
3234         }
3235
3236       /* Exclude cost of op0 from max_cost to match the cost
3237          calculation of the synth_mult.  */
3238       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3239       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3240         return expand_mult_const (mode, op0, coeff, target,
3241                                   &algorithm, variant);
3242     }
3243  skip_synth:
3244
3245   /* Expand x*2.0 as x+x.  */
3246   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3247     {
3248       REAL_VALUE_TYPE d;
3249       REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3250
3251       if (REAL_VALUES_EQUAL (d, dconst2))
3252         {
3253           op0 = force_reg (GET_MODE (op0), op0);
3254           return expand_binop (mode, add_optab, op0, op0,
3255                                target, unsignedp, OPTAB_LIB_WIDEN);
3256         }
3257     }
3258  skip_scalar:
3259
3260   /* This used to use umul_optab if unsigned, but for non-widening multiply
3261      there is no difference between signed and unsigned.  */
3262   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3263                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3264   gcc_assert (op0);
3265   return op0;
3266 }
3267
3268 /* Return a cost estimate for multiplying a register by the given
3269    COEFFicient in the given MODE and SPEED.  */
3270
3271 int
3272 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3273 {
3274   int max_cost;
3275   struct algorithm algorithm;
3276   enum mult_variant variant;
3277
3278   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3279   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3280   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3281     return algorithm.cost.cost;
3282   else
3283     return max_cost;
3284 }
3285
3286 /* Perform a widening multiplication and return an rtx for the result.
3287    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3288    TARGET is a suggestion for where to store the result (an rtx).
3289    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3290    or smul_widen_optab.
3291
3292    We check specially for a constant integer as OP1, comparing the
3293    cost of a widening multiply against the cost of a sequence of shifts
3294    and adds.  */
3295
3296 rtx
3297 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3298                       int unsignedp, optab this_optab)
3299 {
3300   bool speed = optimize_insn_for_speed_p ();
3301   rtx cop1;
3302
3303   if (CONST_INT_P (op1)
3304       && GET_MODE (op0) != VOIDmode
3305       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3306                                 this_optab == umul_widen_optab))
3307       && CONST_INT_P (cop1)
3308       && (INTVAL (cop1) >= 0
3309           || HWI_COMPUTABLE_MODE_P (mode)))
3310     {
3311       HOST_WIDE_INT coeff = INTVAL (cop1);
3312       int max_cost;
3313       enum mult_variant variant;
3314       struct algorithm algorithm;
3315
3316       if (coeff == 0)
3317         return CONST0_RTX (mode);
3318
3319       /* Special case powers of two.  */
3320       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3321         {
3322           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3323           return expand_shift (LSHIFT_EXPR, mode, op0,
3324                                floor_log2 (coeff), target, unsignedp);
3325         }
3326
3327       /* Exclude cost of op0 from max_cost to match the cost
3328          calculation of the synth_mult.  */
3329       max_cost = mul_widen_cost (speed, mode);
3330       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3331                                max_cost))
3332         {
3333           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3334           return expand_mult_const (mode, op0, coeff, target,
3335                                     &algorithm, variant);
3336         }
3337     }
3338   return expand_binop (mode, this_optab, op0, op1, target,
3339                        unsignedp, OPTAB_LIB_WIDEN);
3340 }
3341 \f
3342 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3343    replace division by D, and put the least significant N bits of the result
3344    in *MULTIPLIER_PTR and return the most significant bit.
3345
3346    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3347    needed precision is in PRECISION (should be <= N).
3348
3349    PRECISION should be as small as possible so this function can choose
3350    multiplier more freely.
3351
3352    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3353    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3354
3355    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3356    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3357
3358 unsigned HOST_WIDE_INT
3359 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3360                    unsigned HOST_WIDE_INT *multiplier_ptr,
3361                    int *post_shift_ptr, int *lgup_ptr)
3362 {
3363   int lgup, post_shift;
3364   int pow, pow2;
3365
3366   /* lgup = ceil(log2(divisor)); */
3367   lgup = ceil_log2 (d);
3368
3369   gcc_assert (lgup <= n);
3370
3371   pow = n + lgup;
3372   pow2 = n + lgup - precision;
3373
3374   /* mlow = 2^(N + lgup)/d */
3375   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3376   wide_int mlow = wi::udiv_trunc (val, d);
3377
3378   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3379   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3380   wide_int mhigh = wi::udiv_trunc (val, d);
3381
3382   /* If precision == N, then mlow, mhigh exceed 2^N
3383      (but they do not exceed 2^(N+1)).  */
3384
3385   /* Reduce to lowest terms.  */
3386   for (post_shift = lgup; post_shift > 0; post_shift--)
3387     {
3388       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3389                                                        HOST_BITS_PER_WIDE_INT);
3390       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3391                                                        HOST_BITS_PER_WIDE_INT);
3392       if (ml_lo >= mh_lo)
3393         break;
3394
3395       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3396       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3397     }
3398
3399   *post_shift_ptr = post_shift;
3400   *lgup_ptr = lgup;
3401   if (n < HOST_BITS_PER_WIDE_INT)
3402     {
3403       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3404       *multiplier_ptr = mhigh.to_uhwi () & mask;
3405       return mhigh.to_uhwi () >= mask;
3406     }
3407   else
3408     {
3409       *multiplier_ptr = mhigh.to_uhwi ();
3410       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3411     }
3412 }
3413
3414 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3415    congruent to 1 (mod 2**N).  */
3416
3417 static unsigned HOST_WIDE_INT
3418 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3419 {
3420   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3421
3422   /* The algorithm notes that the choice y = x satisfies
3423      x*y == 1 mod 2^3, since x is assumed odd.
3424      Each iteration doubles the number of bits of significance in y.  */
3425
3426   unsigned HOST_WIDE_INT mask;
3427   unsigned HOST_WIDE_INT y = x;
3428   int nbit = 3;
3429
3430   mask = (n == HOST_BITS_PER_WIDE_INT
3431           ? ~(unsigned HOST_WIDE_INT) 0
3432           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3433
3434   while (nbit < n)
3435     {
3436       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3437       nbit *= 2;
3438     }
3439   return y;
3440 }
3441
3442 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3443    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3444    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3445    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3446    become signed.
3447
3448    The result is put in TARGET if that is convenient.
3449
3450    MODE is the mode of operation.  */
3451
3452 rtx
3453 expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
3454                              rtx op1, rtx target, int unsignedp)
3455 {
3456   rtx tem;
3457   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3458
3459   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3460                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3461   tem = expand_and (mode, tem, op1, NULL_RTX);
3462   adj_operand
3463     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3464                      adj_operand);
3465
3466   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3467                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3468   tem = expand_and (mode, tem, op0, NULL_RTX);
3469   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3470                           target);
3471
3472   return target;
3473 }
3474
3475 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3476
3477 static rtx
3478 extract_high_half (machine_mode mode, rtx op)
3479 {
3480   machine_mode wider_mode;
3481
3482   if (mode == word_mode)
3483     return gen_highpart (mode, op);
3484
3485   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3486
3487   wider_mode = GET_MODE_WIDER_MODE (mode);
3488   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3489                      GET_MODE_BITSIZE (mode), 0, 1);
3490   return convert_modes (mode, wider_mode, op, 0);
3491 }
3492
3493 /* Like expmed_mult_highpart, but only consider using a multiplication
3494    optab.  OP1 is an rtx for the constant operand.  */
3495
3496 static rtx
3497 expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
3498                             rtx target, int unsignedp, int max_cost)
3499 {
3500   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3501   machine_mode wider_mode;
3502   optab moptab;
3503   rtx tem;
3504   int size;
3505   bool speed = optimize_insn_for_speed_p ();
3506
3507   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3508
3509   wider_mode = GET_MODE_WIDER_MODE (mode);
3510   size = GET_MODE_BITSIZE (mode);
3511
3512   /* Firstly, try using a multiplication insn that only generates the needed
3513      high part of the product, and in the sign flavor of unsignedp.  */
3514   if (mul_highpart_cost (speed, mode) < max_cost)
3515     {
3516       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3517       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3518                           unsignedp, OPTAB_DIRECT);
3519       if (tem)
3520         return tem;
3521     }
3522
3523   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3524      Need to adjust the result after the multiplication.  */
3525   if (size - 1 < BITS_PER_WORD
3526       && (mul_highpart_cost (speed, mode)
3527           + 2 * shift_cost (speed, mode, size-1)
3528           + 4 * add_cost (speed, mode) < max_cost))
3529     {
3530       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3531       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3532                           unsignedp, OPTAB_DIRECT);
3533       if (tem)
3534         /* We used the wrong signedness.  Adjust the result.  */
3535         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3536                                             tem, unsignedp);
3537     }
3538
3539   /* Try widening multiplication.  */
3540   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3541   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3542       && mul_widen_cost (speed, wider_mode) < max_cost)
3543     {
3544       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3545                           unsignedp, OPTAB_WIDEN);
3546       if (tem)
3547         return extract_high_half (mode, tem);
3548     }
3549
3550   /* Try widening the mode and perform a non-widening multiplication.  */
3551   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3552       && size - 1 < BITS_PER_WORD
3553       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3554           < max_cost))
3555     {
3556       rtx_insn *insns;
3557       rtx wop0, wop1;
3558
3559       /* We need to widen the operands, for example to ensure the
3560          constant multiplier is correctly sign or zero extended.
3561          Use a sequence to clean-up any instructions emitted by
3562          the conversions if things don't work out.  */
3563       start_sequence ();
3564       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3565       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3566       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3567                           unsignedp, OPTAB_WIDEN);
3568       insns = get_insns ();
3569       end_sequence ();
3570
3571       if (tem)
3572         {
3573           emit_insn (insns);
3574           return extract_high_half (mode, tem);
3575         }
3576     }
3577
3578   /* Try widening multiplication of opposite signedness, and adjust.  */
3579   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3580   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3581       && size - 1 < BITS_PER_WORD
3582       && (mul_widen_cost (speed, wider_mode)
3583           + 2 * shift_cost (speed, mode, size-1)
3584           + 4 * add_cost (speed, mode) < max_cost))
3585     {
3586       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3587                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3588       if (tem != 0)
3589         {
3590           tem = extract_high_half (mode, tem);
3591           /* We used the wrong signedness.  Adjust the result.  */
3592           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3593                                               target, unsignedp);
3594         }
3595     }
3596
3597   return 0;
3598 }
3599
3600 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3601    putting the high half of the result in TARGET if that is convenient,
3602    and return where the result is.  If the operation can not be performed,
3603    0 is returned.
3604
3605    MODE is the mode of operation and result.
3606
3607    UNSIGNEDP nonzero means unsigned multiply.
3608
3609    MAX_COST is the total allowed cost for the expanded RTL.  */
3610
3611 static rtx
3612 expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
3613                       rtx target, int unsignedp, int max_cost)
3614 {
3615   machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3616   unsigned HOST_WIDE_INT cnst1;
3617   int extra_cost;
3618   bool sign_adjust = false;
3619   enum mult_variant variant;
3620   struct algorithm alg;
3621   rtx tem;
3622   bool speed = optimize_insn_for_speed_p ();
3623
3624   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3625   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3626   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3627
3628   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3629
3630   /* We can't optimize modes wider than BITS_PER_WORD.
3631      ??? We might be able to perform double-word arithmetic if
3632      mode == word_mode, however all the cost calculations in
3633      synth_mult etc. assume single-word operations.  */
3634   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3635     return expmed_mult_highpart_optab (mode, op0, op1, target,
3636                                        unsignedp, max_cost);
3637
3638   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3639
3640   /* Check whether we try to multiply by a negative constant.  */
3641   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3642     {
3643       sign_adjust = true;
3644       extra_cost += add_cost (speed, mode);
3645     }
3646
3647   /* See whether shift/add multiplication is cheap enough.  */
3648   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3649                            max_cost - extra_cost))
3650     {
3651       /* See whether the specialized multiplication optabs are
3652          cheaper than the shift/add version.  */
3653       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3654                                         alg.cost.cost + extra_cost);
3655       if (tem)
3656         return tem;
3657
3658       tem = convert_to_mode (wider_mode, op0, unsignedp);
3659       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3660       tem = extract_high_half (mode, tem);
3661
3662       /* Adjust result for signedness.  */
3663       if (sign_adjust)
3664         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3665
3666       return tem;
3667     }
3668   return expmed_mult_highpart_optab (mode, op0, op1, target,
3669                                      unsignedp, max_cost);
3670 }
3671
3672
3673 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3674
3675 static rtx
3676 expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3677 {
3678   rtx result, temp, shift;
3679   rtx_code_label *label;
3680   int logd;
3681   int prec = GET_MODE_PRECISION (mode);
3682
3683   logd = floor_log2 (d);
3684   result = gen_reg_rtx (mode);
3685
3686   /* Avoid conditional branches when they're expensive.  */
3687   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3688       && optimize_insn_for_speed_p ())
3689     {
3690       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3691                                       mode, 0, -1);
3692       if (signmask)
3693         {
3694           HOST_WIDE_INT masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3695           signmask = force_reg (mode, signmask);
3696           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3697
3698           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3699              which instruction sequence to use.  If logical right shifts
3700              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3701              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3702
3703           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3704           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3705               || (set_src_cost (temp, optimize_insn_for_speed_p ())
3706                   > COSTS_N_INSNS (2)))
3707             {
3708               temp = expand_binop (mode, xor_optab, op0, signmask,
3709                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3710               temp = expand_binop (mode, sub_optab, temp, signmask,
3711                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3712               temp = expand_binop (mode, and_optab, temp,
3713                                    gen_int_mode (masklow, mode),
3714                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3715               temp = expand_binop (mode, xor_optab, temp, signmask,
3716                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3717               temp = expand_binop (mode, sub_optab, temp, signmask,
3718                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3719             }
3720           else
3721             {
3722               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3723                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3724               signmask = force_reg (mode, signmask);
3725
3726               temp = expand_binop (mode, add_optab, op0, signmask,
3727                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3728               temp = expand_binop (mode, and_optab, temp,
3729                                    gen_int_mode (masklow, mode),
3730                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3731               temp = expand_binop (mode, sub_optab, temp, signmask,
3732                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3733             }
3734           return temp;
3735         }
3736     }
3737
3738   /* Mask contains the mode's signbit and the significant bits of the
3739      modulus.  By including the signbit in the operation, many targets
3740      can avoid an explicit compare operation in the following comparison
3741      against zero.  */
3742   wide_int mask = wi::mask (logd, false, prec);
3743   mask = wi::set_bit (mask, prec - 1);
3744
3745   temp = expand_binop (mode, and_optab, op0,
3746                        immed_wide_int_const (mask, mode),
3747                        result, 1, OPTAB_LIB_WIDEN);
3748   if (temp != result)
3749     emit_move_insn (result, temp);
3750
3751   label = gen_label_rtx ();
3752   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3753
3754   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3755                        0, OPTAB_LIB_WIDEN);
3756
3757   mask = wi::mask (logd, true, prec);
3758   temp = expand_binop (mode, ior_optab, temp,
3759                        immed_wide_int_const (mask, mode),
3760                        result, 1, OPTAB_LIB_WIDEN);
3761   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3762                        0, OPTAB_LIB_WIDEN);
3763   if (temp != result)
3764     emit_move_insn (result, temp);
3765   emit_label (label);
3766   return result;
3767 }
3768
3769 /* Expand signed division of OP0 by a power of two D in mode MODE.
3770    This routine is only called for positive values of D.  */
3771
3772 static rtx
3773 expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3774 {
3775   rtx temp;
3776   rtx_code_label *label;
3777   int logd;
3778
3779   logd = floor_log2 (d);
3780
3781   if (d == 2
3782       && BRANCH_COST (optimize_insn_for_speed_p (),
3783                       false) >= 1)
3784     {
3785       temp = gen_reg_rtx (mode);
3786       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3787       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3788                            0, OPTAB_LIB_WIDEN);
3789       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3790     }
3791
3792 #ifdef HAVE_conditional_move
3793   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3794       >= 2)
3795     {
3796       rtx temp2;
3797
3798       start_sequence ();
3799       temp2 = copy_to_mode_reg (mode, op0);
3800       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3801                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3802       temp = force_reg (mode, temp);
3803
3804       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3805       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3806                                      mode, temp, temp2, mode, 0);
3807       if (temp2)
3808         {
3809           rtx_insn *seq = get_insns ();
3810           end_sequence ();
3811           emit_insn (seq);
3812           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3813         }
3814       end_sequence ();
3815     }
3816 #endif
3817
3818   if (BRANCH_COST (optimize_insn_for_speed_p (),
3819                    false) >= 2)
3820     {
3821       int ushift = GET_MODE_BITSIZE (mode) - logd;
3822
3823       temp = gen_reg_rtx (mode);
3824       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3825       if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3826           || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3827              > COSTS_N_INSNS (1))
3828         temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3829                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3830       else
3831         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3832                              ushift, NULL_RTX, 1);
3833       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3834                            0, OPTAB_LIB_WIDEN);
3835       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3836     }
3837
3838   label = gen_label_rtx ();
3839   temp = copy_to_mode_reg (mode, op0);
3840   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3841   expand_inc (temp, gen_int_mode (d - 1, mode));
3842   emit_label (label);
3843   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3844 }
3845 \f
3846 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3847    if that is convenient, and returning where the result is.
3848    You may request either the quotient or the remainder as the result;
3849    specify REM_FLAG nonzero to get the remainder.
3850
3851    CODE is the expression code for which kind of division this is;
3852    it controls how rounding is done.  MODE is the machine mode to use.
3853    UNSIGNEDP nonzero means do unsigned division.  */
3854
3855 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3856    and then correct it by or'ing in missing high bits
3857    if result of ANDI is nonzero.
3858    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3859    This could optimize to a bfexts instruction.
3860    But C doesn't use these operations, so their optimizations are
3861    left for later.  */
3862 /* ??? For modulo, we don't actually need the highpart of the first product,
3863    the low part will do nicely.  And for small divisors, the second multiply
3864    can also be a low-part only multiply or even be completely left out.
3865    E.g. to calculate the remainder of a division by 3 with a 32 bit
3866    multiply, multiply with 0x55555556 and extract the upper two bits;
3867    the result is exact for inputs up to 0x1fffffff.
3868    The input range can be reduced by using cross-sum rules.
3869    For odd divisors >= 3, the following table gives right shift counts
3870    so that if a number is shifted by an integer multiple of the given
3871    amount, the remainder stays the same:
3872    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3873    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3874    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3875    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3876    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3877
3878    Cross-sum rules for even numbers can be derived by leaving as many bits
3879    to the right alone as the divisor has zeros to the right.
3880    E.g. if x is an unsigned 32 bit number:
3881    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3882    */
3883
3884 rtx
3885 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
3886                rtx op0, rtx op1, rtx target, int unsignedp)
3887 {
3888   machine_mode compute_mode;
3889   rtx tquotient;
3890   rtx quotient = 0, remainder = 0;
3891   rtx_insn *last;
3892   int size;
3893   rtx_insn *insn;
3894   optab optab1, optab2;
3895   int op1_is_constant, op1_is_pow2 = 0;
3896   int max_cost, extra_cost;
3897   static HOST_WIDE_INT last_div_const = 0;
3898   bool speed = optimize_insn_for_speed_p ();
3899
3900   op1_is_constant = CONST_INT_P (op1);
3901   if (op1_is_constant)
3902     {
3903       unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1);
3904       if (unsignedp)
3905         ext_op1 &= GET_MODE_MASK (mode);
3906       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3907                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3908     }
3909
3910   /*
3911      This is the structure of expand_divmod:
3912
3913      First comes code to fix up the operands so we can perform the operations
3914      correctly and efficiently.
3915
3916      Second comes a switch statement with code specific for each rounding mode.
3917      For some special operands this code emits all RTL for the desired
3918      operation, for other cases, it generates only a quotient and stores it in
3919      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3920      to indicate that it has not done anything.
3921
3922      Last comes code that finishes the operation.  If QUOTIENT is set and
3923      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3924      QUOTIENT is not set, it is computed using trunc rounding.
3925
3926      We try to generate special code for division and remainder when OP1 is a
3927      constant.  If |OP1| = 2**n we can use shifts and some other fast
3928      operations.  For other values of OP1, we compute a carefully selected
3929      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3930      by m.
3931
3932      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3933      half of the product.  Different strategies for generating the product are
3934      implemented in expmed_mult_highpart.
3935
3936      If what we actually want is the remainder, we generate that by another
3937      by-constant multiplication and a subtraction.  */
3938
3939   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3940      code below will malfunction if we are, so check here and handle
3941      the special case if so.  */
3942   if (op1 == const1_rtx)
3943     return rem_flag ? const0_rtx : op0;
3944
3945     /* When dividing by -1, we could get an overflow.
3946      negv_optab can handle overflows.  */
3947   if (! unsignedp && op1 == constm1_rtx)
3948     {
3949       if (rem_flag)
3950         return const0_rtx;
3951       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
3952                           ? negv_optab : neg_optab, op0, target, 0);
3953     }
3954
3955   if (target
3956       /* Don't use the function value register as a target
3957          since we have to read it as well as write it,
3958          and function-inlining gets confused by this.  */
3959       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3960           /* Don't clobber an operand while doing a multi-step calculation.  */
3961           || ((rem_flag || op1_is_constant)
3962               && (reg_mentioned_p (target, op0)
3963                   || (MEM_P (op0) && MEM_P (target))))
3964           || reg_mentioned_p (target, op1)
3965           || (MEM_P (op1) && MEM_P (target))))
3966     target = 0;
3967
3968   /* Get the mode in which to perform this computation.  Normally it will
3969      be MODE, but sometimes we can't do the desired operation in MODE.
3970      If so, pick a wider mode in which we can do the operation.  Convert
3971      to that mode at the start to avoid repeated conversions.
3972
3973      First see what operations we need.  These depend on the expression
3974      we are evaluating.  (We assume that divxx3 insns exist under the
3975      same conditions that modxx3 insns and that these insns don't normally
3976      fail.  If these assumptions are not correct, we may generate less
3977      efficient code in some cases.)
3978
3979      Then see if we find a mode in which we can open-code that operation
3980      (either a division, modulus, or shift).  Finally, check for the smallest
3981      mode for which we can do the operation with a library call.  */
3982
3983   /* We might want to refine this now that we have division-by-constant
3984      optimization.  Since expmed_mult_highpart tries so many variants, it is
3985      not straightforward to generalize this.  Maybe we should make an array
3986      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3987
3988   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3989             ? (unsignedp ? lshr_optab : ashr_optab)
3990             : (unsignedp ? udiv_optab : sdiv_optab));
3991   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3992             ? optab1
3993             : (unsignedp ? udivmod_optab : sdivmod_optab));
3994
3995   for (compute_mode = mode; compute_mode != VOIDmode;
3996        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3997     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3998         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3999       break;
4000
4001   if (compute_mode == VOIDmode)
4002     for (compute_mode = mode; compute_mode != VOIDmode;
4003          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4004       if (optab_libfunc (optab1, compute_mode)
4005           || optab_libfunc (optab2, compute_mode))
4006         break;
4007
4008   /* If we still couldn't find a mode, use MODE, but expand_binop will
4009      probably die.  */
4010   if (compute_mode == VOIDmode)
4011     compute_mode = mode;
4012
4013   if (target && GET_MODE (target) == compute_mode)
4014     tquotient = target;
4015   else
4016     tquotient = gen_reg_rtx (compute_mode);
4017
4018   size = GET_MODE_BITSIZE (compute_mode);
4019 #if 0
4020   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4021      (mode), and thereby get better code when OP1 is a constant.  Do that
4022      later.  It will require going over all usages of SIZE below.  */
4023   size = GET_MODE_BITSIZE (mode);
4024 #endif
4025
4026   /* Only deduct something for a REM if the last divide done was
4027      for a different constant.   Then set the constant of the last
4028      divide.  */
4029   max_cost = (unsignedp
4030               ? udiv_cost (speed, compute_mode)
4031               : sdiv_cost (speed, compute_mode));
4032   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4033                      && INTVAL (op1) == last_div_const))
4034     max_cost -= (mul_cost (speed, compute_mode)
4035                  + add_cost (speed, compute_mode));
4036
4037   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4038
4039   /* Now convert to the best mode to use.  */
4040   if (compute_mode != mode)
4041     {
4042       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4043       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4044
4045       /* convert_modes may have placed op1 into a register, so we
4046          must recompute the following.  */
4047       op1_is_constant = CONST_INT_P (op1);
4048       op1_is_pow2 = (op1_is_constant
4049                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4050                           || (! unsignedp
4051                               && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1))))));
4052     }
4053
4054   /* If one of the operands is a volatile MEM, copy it into a register.  */
4055
4056   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4057     op0 = force_reg (compute_mode, op0);
4058   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4059     op1 = force_reg (compute_mode, op1);
4060
4061   /* If we need the remainder or if OP1 is constant, we need to
4062      put OP0 in a register in case it has any queued subexpressions.  */
4063   if (rem_flag || op1_is_constant)
4064     op0 = force_reg (compute_mode, op0);
4065
4066   last = get_last_insn ();
4067
4068   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4069   if (unsignedp)
4070     {
4071       if (code == FLOOR_DIV_EXPR)
4072         code = TRUNC_DIV_EXPR;
4073       if (code == FLOOR_MOD_EXPR)
4074         code = TRUNC_MOD_EXPR;
4075       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4076         code = TRUNC_DIV_EXPR;
4077     }
4078
4079   if (op1 != const0_rtx)
4080     switch (code)
4081       {
4082       case TRUNC_MOD_EXPR:
4083       case TRUNC_DIV_EXPR:
4084         if (op1_is_constant)
4085           {
4086             if (unsignedp)
4087               {
4088                 unsigned HOST_WIDE_INT mh, ml;
4089                 int pre_shift, post_shift;
4090                 int dummy;
4091                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4092                                             & GET_MODE_MASK (compute_mode));
4093
4094                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4095                   {
4096                     pre_shift = floor_log2 (d);
4097                     if (rem_flag)
4098                       {
4099                         unsigned HOST_WIDE_INT mask
4100                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4101                         remainder
4102                           = expand_binop (compute_mode, and_optab, op0,
4103                                           gen_int_mode (mask, compute_mode),
4104                                           remainder, 1,
4105                                           OPTAB_LIB_WIDEN);
4106                         if (remainder)
4107                           return gen_lowpart (mode, remainder);
4108                       }
4109                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4110                                              pre_shift, tquotient, 1);
4111                   }
4112                 else if (size <= HOST_BITS_PER_WIDE_INT)
4113                   {
4114                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4115                       {
4116                         /* Most significant bit of divisor is set; emit an scc
4117                            insn.  */
4118                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4119                                                           compute_mode, 1, 1);
4120                       }
4121                     else
4122                       {
4123                         /* Find a suitable multiplier and right shift count
4124                            instead of multiplying with D.  */
4125
4126                         mh = choose_multiplier (d, size, size,
4127                                                 &ml, &post_shift, &dummy);
4128
4129                         /* If the suggested multiplier is more than SIZE bits,
4130                            we can do better for even divisors, using an
4131                            initial right shift.  */
4132                         if (mh != 0 && (d & 1) == 0)
4133                           {
4134                             pre_shift = floor_log2 (d & -d);
4135                             mh = choose_multiplier (d >> pre_shift, size,
4136                                                     size - pre_shift,
4137                                                     &ml, &post_shift, &dummy);
4138                             gcc_assert (!mh);
4139                           }
4140                         else
4141                           pre_shift = 0;
4142
4143                         if (mh != 0)
4144                           {
4145                             rtx t1, t2, t3, t4;
4146
4147                             if (post_shift - 1 >= BITS_PER_WORD)
4148                               goto fail1;
4149
4150                             extra_cost
4151                               = (shift_cost (speed, compute_mode, post_shift - 1)
4152                                  + shift_cost (speed, compute_mode, 1)
4153                                  + 2 * add_cost (speed, compute_mode));
4154                             t1 = expmed_mult_highpart
4155                               (compute_mode, op0,
4156                                gen_int_mode (ml, compute_mode),
4157                                NULL_RTX, 1, max_cost - extra_cost);
4158                             if (t1 == 0)
4159                               goto fail1;
4160                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4161                                                                op0, t1),
4162                                                 NULL_RTX);
4163                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4164                                                t2, 1, NULL_RTX, 1);
4165                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4166                                                               t1, t3),
4167                                                 NULL_RTX);
4168                             quotient = expand_shift
4169                               (RSHIFT_EXPR, compute_mode, t4,
4170                                post_shift - 1, tquotient, 1);
4171                           }
4172                         else
4173                           {
4174                             rtx t1, t2;
4175
4176                             if (pre_shift >= BITS_PER_WORD
4177                                 || post_shift >= BITS_PER_WORD)
4178                               goto fail1;
4179
4180                             t1 = expand_shift
4181                               (RSHIFT_EXPR, compute_mode, op0,
4182                                pre_shift, NULL_RTX, 1);
4183                             extra_cost
4184                               = (shift_cost (speed, compute_mode, pre_shift)
4185                                  + shift_cost (speed, compute_mode, post_shift));
4186                             t2 = expmed_mult_highpart
4187                               (compute_mode, t1,
4188                                gen_int_mode (ml, compute_mode),
4189                                NULL_RTX, 1, max_cost - extra_cost);
4190                             if (t2 == 0)
4191                               goto fail1;
4192                             quotient = expand_shift
4193                               (RSHIFT_EXPR, compute_mode, t2,
4194                                post_shift, tquotient, 1);
4195                           }
4196                       }
4197                   }
4198                 else            /* Too wide mode to use tricky code */
4199                   break;
4200
4201                 insn = get_last_insn ();
4202                 if (insn != last)
4203                   set_dst_reg_note (insn, REG_EQUAL,
4204                                     gen_rtx_UDIV (compute_mode, op0, op1),
4205                                     quotient);
4206               }
4207             else                /* TRUNC_DIV, signed */
4208               {
4209                 unsigned HOST_WIDE_INT ml;
4210                 int lgup, post_shift;
4211                 rtx mlr;
4212                 HOST_WIDE_INT d = INTVAL (op1);
4213                 unsigned HOST_WIDE_INT abs_d;
4214
4215                 /* Since d might be INT_MIN, we have to cast to
4216                    unsigned HOST_WIDE_INT before negating to avoid
4217                    undefined signed overflow.  */
4218                 abs_d = (d >= 0
4219                          ? (unsigned HOST_WIDE_INT) d
4220                          : - (unsigned HOST_WIDE_INT) d);
4221
4222                 /* n rem d = n rem -d */
4223                 if (rem_flag && d < 0)
4224                   {
4225                     d = abs_d;
4226                     op1 = gen_int_mode (abs_d, compute_mode);
4227                   }
4228
4229                 if (d == 1)
4230                   quotient = op0;
4231                 else if (d == -1)
4232                   quotient = expand_unop (compute_mode, neg_optab, op0,
4233                                           tquotient, 0);
4234                 else if (HOST_BITS_PER_WIDE_INT >= size
4235                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4236                   {
4237                     /* This case is not handled correctly below.  */
4238                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4239                                                 compute_mode, 1, 1);
4240                     if (quotient == 0)
4241                       goto fail1;
4242                   }
4243                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4244                          && (rem_flag
4245                              ? smod_pow2_cheap (speed, compute_mode)
4246                              : sdiv_pow2_cheap (speed, compute_mode))
4247                          /* We assume that cheap metric is true if the
4248                             optab has an expander for this mode.  */
4249                          && ((optab_handler ((rem_flag ? smod_optab
4250                                               : sdiv_optab),
4251                                              compute_mode)
4252                               != CODE_FOR_nothing)
4253                              || (optab_handler (sdivmod_optab,
4254                                                 compute_mode)
4255                                  != CODE_FOR_nothing)))
4256                   ;
4257                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4258                   {
4259                     if (rem_flag)
4260                       {
4261                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4262                         if (remainder)
4263                           return gen_lowpart (mode, remainder);
4264                       }
4265
4266                     if (sdiv_pow2_cheap (speed, compute_mode)
4267                         && ((optab_handler (sdiv_optab, compute_mode)
4268                              != CODE_FOR_nothing)
4269                             || (optab_handler (sdivmod_optab, compute_mode)
4270                                 != CODE_FOR_nothing)))
4271                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4272                                                 compute_mode, op0,
4273                                                 gen_int_mode (abs_d,
4274                                                               compute_mode),
4275                                                 NULL_RTX, 0);
4276                     else
4277                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4278
4279                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4280                        negate the quotient.  */
4281                     if (d < 0)
4282                       {
4283                         insn = get_last_insn ();
4284                         if (insn != last
4285                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4286                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4287                           set_dst_reg_note (insn, REG_EQUAL,
4288                                             gen_rtx_DIV (compute_mode, op0,
4289                                                          gen_int_mode
4290                                                            (abs_d,
4291                                                             compute_mode)),
4292                                             quotient);
4293
4294                         quotient = expand_unop (compute_mode, neg_optab,
4295                                                 quotient, quotient, 0);
4296                       }
4297                   }
4298                 else if (size <= HOST_BITS_PER_WIDE_INT)
4299                   {
4300                     choose_multiplier (abs_d, size, size - 1,
4301                                        &ml, &post_shift, &lgup);
4302                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4303                       {
4304                         rtx t1, t2, t3;
4305
4306                         if (post_shift >= BITS_PER_WORD
4307                             || size - 1 >= BITS_PER_WORD)
4308                           goto fail1;
4309
4310                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4311                                       + shift_cost (speed, compute_mode, size - 1)
4312                                       + add_cost (speed, compute_mode));
4313                         t1 = expmed_mult_highpart
4314                           (compute_mode, op0, gen_int_mode (ml, compute_mode),
4315                            NULL_RTX, 0, max_cost - extra_cost);
4316                         if (t1 == 0)
4317                           goto fail1;
4318                         t2 = expand_shift
4319                           (RSHIFT_EXPR, compute_mode, t1,
4320                            post_shift, NULL_RTX, 0);
4321                         t3 = expand_shift
4322                           (RSHIFT_EXPR, compute_mode, op0,
4323                            size - 1, NULL_RTX, 0);
4324                         if (d < 0)
4325                           quotient
4326                             = force_operand (gen_rtx_MINUS (compute_mode,
4327                                                             t3, t2),
4328                                              tquotient);
4329                         else
4330                           quotient
4331                             = force_operand (gen_rtx_MINUS (compute_mode,
4332                                                             t2, t3),
4333                                              tquotient);
4334                       }
4335                     else
4336                       {
4337                         rtx t1, t2, t3, t4;
4338
4339                         if (post_shift >= BITS_PER_WORD
4340                             || size - 1 >= BITS_PER_WORD)
4341                           goto fail1;
4342
4343                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4344                         mlr = gen_int_mode (ml, compute_mode);
4345                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4346                                       + shift_cost (speed, compute_mode, size - 1)
4347                                       + 2 * add_cost (speed, compute_mode));
4348                         t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4349                                                    NULL_RTX, 0,
4350                                                    max_cost - extra_cost);
4351                         if (t1 == 0)
4352                           goto fail1;
4353                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4354                                                           t1, op0),
4355                                             NULL_RTX);
4356                         t3 = expand_shift
4357                           (RSHIFT_EXPR, compute_mode, t2,
4358                            post_shift, NULL_RTX, 0);
4359                         t4 = expand_shift
4360                           (RSHIFT_EXPR, compute_mode, op0,
4361                            size - 1, NULL_RTX, 0);
4362                         if (d < 0)
4363                           quotient
4364                             = force_operand (gen_rtx_MINUS (compute_mode,
4365                                                             t4, t3),
4366                                              tquotient);
4367                         else
4368                           quotient
4369                             = force_operand (gen_rtx_MINUS (compute_mode,
4370                                                             t3, t4),
4371                                              tquotient);
4372                       }
4373                   }
4374                 else            /* Too wide mode to use tricky code */
4375                   break;
4376
4377                 insn = get_last_insn ();
4378                 if (insn != last)
4379                   set_dst_reg_note (insn, REG_EQUAL,
4380                                     gen_rtx_DIV (compute_mode, op0, op1),
4381                                     quotient);
4382               }
4383             break;
4384           }
4385       fail1:
4386         delete_insns_since (last);
4387         break;
4388
4389       case FLOOR_DIV_EXPR:
4390       case FLOOR_MOD_EXPR:
4391       /* We will come here only for signed operations.  */
4392         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4393           {
4394             unsigned HOST_WIDE_INT mh, ml;
4395             int pre_shift, lgup, post_shift;
4396             HOST_WIDE_INT d = INTVAL (op1);
4397
4398             if (d > 0)
4399               {
4400                 /* We could just as easily deal with negative constants here,
4401                    but it does not seem worth the trouble for GCC 2.6.  */
4402                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4403                   {
4404                     pre_shift = floor_log2 (d);
4405                     if (rem_flag)
4406                       {
4407                         unsigned HOST_WIDE_INT mask
4408                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4409                         remainder = expand_binop
4410                           (compute_mode, and_optab, op0,
4411                            gen_int_mode (mask, compute_mode),
4412                            remainder, 0, OPTAB_LIB_WIDEN);
4413                         if (remainder)
4414                           return gen_lowpart (mode, remainder);
4415                       }
4416                     quotient = expand_shift
4417                       (RSHIFT_EXPR, compute_mode, op0,
4418                        pre_shift, tquotient, 0);
4419                   }
4420                 else
4421                   {
4422                     rtx t1, t2, t3, t4;
4423
4424                     mh = choose_multiplier (d, size, size - 1,
4425                                             &ml, &post_shift, &lgup);
4426                     gcc_assert (!mh);
4427
4428                     if (post_shift < BITS_PER_WORD
4429                         && size - 1 < BITS_PER_WORD)
4430                       {
4431                         t1 = expand_shift
4432                           (RSHIFT_EXPR, compute_mode, op0,
4433                            size - 1, NULL_RTX, 0);
4434                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4435                                            NULL_RTX, 0, OPTAB_WIDEN);
4436                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4437                                       + shift_cost (speed, compute_mode, size - 1)
4438                                       + 2 * add_cost (speed, compute_mode));
4439                         t3 = expmed_mult_highpart
4440                           (compute_mode, t2, gen_int_mode (ml, compute_mode),
4441                            NULL_RTX, 1, max_cost - extra_cost);
4442                         if (t3 != 0)
4443                           {
4444                             t4 = expand_shift
4445                               (RSHIFT_EXPR, compute_mode, t3,
4446                                post_shift, NULL_RTX, 1);
4447                             quotient = expand_binop (compute_mode, xor_optab,
4448                                                      t4, t1, tquotient, 0,
4449                                                      OPTAB_WIDEN);
4450                           }
4451                       }
4452                   }
4453               }
4454             else
4455               {
4456                 rtx nsign, t1, t2, t3, t4;
4457                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4458                                                   op0, constm1_rtx), NULL_RTX);
4459                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4460                                    0, OPTAB_WIDEN);
4461                 nsign = expand_shift
4462                   (RSHIFT_EXPR, compute_mode, t2,
4463                    size - 1, NULL_RTX, 0);
4464                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4465                                     NULL_RTX);
4466                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4467                                     NULL_RTX, 0);
4468                 if (t4)
4469                   {
4470                     rtx t5;
4471                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4472                                       NULL_RTX, 0);
4473                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4474                                                             t4, t5),
4475                                               tquotient);
4476                   }
4477               }
4478           }
4479
4480         if (quotient != 0)
4481           break;
4482         delete_insns_since (last);
4483
4484         /* Try using an instruction that produces both the quotient and
4485            remainder, using truncation.  We can easily compensate the quotient
4486            or remainder to get floor rounding, once we have the remainder.
4487            Notice that we compute also the final remainder value here,
4488            and return the result right away.  */
4489         if (target == 0 || GET_MODE (target) != compute_mode)
4490           target = gen_reg_rtx (compute_mode);
4491
4492         if (rem_flag)
4493           {
4494             remainder
4495               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4496             quotient = gen_reg_rtx (compute_mode);
4497           }
4498         else
4499           {
4500             quotient
4501               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4502             remainder = gen_reg_rtx (compute_mode);
4503           }
4504
4505         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4506                                  quotient, remainder, 0))
4507           {
4508             /* This could be computed with a branch-less sequence.
4509                Save that for later.  */
4510             rtx tem;
4511             rtx_code_label *label = gen_label_rtx ();
4512             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4513             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4514                                 NULL_RTX, 0, OPTAB_WIDEN);
4515             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4516             expand_dec (quotient, const1_rtx);
4517             expand_inc (remainder, op1);
4518             emit_label (label);
4519             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4520           }
4521
4522         /* No luck with division elimination or divmod.  Have to do it
4523            by conditionally adjusting op0 *and* the result.  */
4524         {
4525           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4526           rtx adjusted_op0;
4527           rtx tem;
4528
4529           quotient = gen_reg_rtx (compute_mode);
4530           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4531           label1 = gen_label_rtx ();
4532           label2 = gen_label_rtx ();
4533           label3 = gen_label_rtx ();
4534           label4 = gen_label_rtx ();
4535           label5 = gen_label_rtx ();
4536           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4537           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4538           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4539                               quotient, 0, OPTAB_LIB_WIDEN);
4540           if (tem != quotient)
4541             emit_move_insn (quotient, tem);
4542           emit_jump_insn (gen_jump (label5));
4543           emit_barrier ();
4544           emit_label (label1);
4545           expand_inc (adjusted_op0, const1_rtx);
4546           emit_jump_insn (gen_jump (label4));
4547           emit_barrier ();
4548           emit_label (label2);
4549           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4550           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4551                               quotient, 0, OPTAB_LIB_WIDEN);
4552           if (tem != quotient)
4553             emit_move_insn (quotient, tem);
4554           emit_jump_insn (gen_jump (label5));
4555           emit_barrier ();
4556           emit_label (label3);
4557           expand_dec (adjusted_op0, const1_rtx);
4558           emit_label (label4);
4559           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4560                               quotient, 0, OPTAB_LIB_WIDEN);
4561           if (tem != quotient)
4562             emit_move_insn (quotient, tem);
4563           expand_dec (quotient, const1_rtx);
4564           emit_label (label5);
4565         }
4566         break;
4567
4568       case CEIL_DIV_EXPR:
4569       case CEIL_MOD_EXPR:
4570         if (unsignedp)
4571           {
4572             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4573               {
4574                 rtx t1, t2, t3;
4575                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4576                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4577                                    floor_log2 (d), tquotient, 1);
4578                 t2 = expand_binop (compute_mode, and_optab, op0,
4579                                    gen_int_mode (d - 1, compute_mode),
4580                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4581                 t3 = gen_reg_rtx (compute_mode);
4582                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4583                                       compute_mode, 1, 1);
4584                 if (t3 == 0)
4585                   {
4586                     rtx_code_label *lab;
4587                     lab = gen_label_rtx ();
4588                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4589                     expand_inc (t1, const1_rtx);
4590                     emit_label (lab);
4591                     quotient = t1;
4592                   }
4593                 else
4594                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4595                                                           t1, t3),
4596                                             tquotient);
4597                 break;
4598               }
4599
4600             /* Try using an instruction that produces both the quotient and
4601                remainder, using truncation.  We can easily compensate the
4602                quotient or remainder to get ceiling rounding, once we have the
4603                remainder.  Notice that we compute also the final remainder
4604                value here, and return the result right away.  */
4605             if (target == 0 || GET_MODE (target) != compute_mode)
4606               target = gen_reg_rtx (compute_mode);
4607
4608             if (rem_flag)
4609               {
4610                 remainder = (REG_P (target)
4611                              ? target : gen_reg_rtx (compute_mode));
4612                 quotient = gen_reg_rtx (compute_mode);
4613               }
4614             else
4615               {
4616                 quotient = (REG_P (target)
4617                             ? target : gen_reg_rtx (compute_mode));
4618                 remainder = gen_reg_rtx (compute_mode);
4619               }
4620
4621             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4622                                      remainder, 1))
4623               {
4624                 /* This could be computed with a branch-less sequence.
4625                    Save that for later.  */
4626                 rtx_code_label *label = gen_label_rtx ();
4627                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4628                                  compute_mode, label);
4629                 expand_inc (quotient, const1_rtx);
4630                 expand_dec (remainder, op1);
4631                 emit_label (label);
4632                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4633               }
4634
4635             /* No luck with division elimination or divmod.  Have to do it
4636                by conditionally adjusting op0 *and* the result.  */
4637             {
4638               rtx_code_label *label1, *label2;
4639               rtx adjusted_op0, tem;
4640
4641               quotient = gen_reg_rtx (compute_mode);
4642               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4643               label1 = gen_label_rtx ();
4644               label2 = gen_label_rtx ();
4645               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4646                                compute_mode, label1);
4647               emit_move_insn  (quotient, const0_rtx);
4648               emit_jump_insn (gen_jump (label2));
4649               emit_barrier ();
4650               emit_label (label1);
4651               expand_dec (adjusted_op0, const1_rtx);
4652               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4653                                   quotient, 1, OPTAB_LIB_WIDEN);
4654               if (tem != quotient)
4655                 emit_move_insn (quotient, tem);
4656               expand_inc (quotient, const1_rtx);
4657               emit_label (label2);
4658             }
4659           }
4660         else /* signed */
4661           {
4662             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4663                 && INTVAL (op1) >= 0)
4664               {
4665                 /* This is extremely similar to the code for the unsigned case
4666                    above.  For 2.7 we should merge these variants, but for
4667                    2.6.1 I don't want to touch the code for unsigned since that
4668                    get used in C.  The signed case will only be used by other
4669                    languages (Ada).  */
4670
4671                 rtx t1, t2, t3;
4672                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4673                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4674                                    floor_log2 (d), tquotient, 0);
4675                 t2 = expand_binop (compute_mode, and_optab, op0,
4676                                    gen_int_mode (d - 1, compute_mode),
4677                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4678                 t3 = gen_reg_rtx (compute_mode);
4679                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4680                                       compute_mode, 1, 1);
4681                 if (t3 == 0)
4682                   {
4683                     rtx_code_label *lab;
4684                     lab = gen_label_rtx ();
4685                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4686                     expand_inc (t1, const1_rtx);
4687                     emit_label (lab);
4688                     quotient = t1;
4689                   }
4690                 else
4691                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4692                                                           t1, t3),
4693                                             tquotient);
4694                 break;
4695               }
4696
4697             /* Try using an instruction that produces both the quotient and
4698                remainder, using truncation.  We can easily compensate the
4699                quotient or remainder to get ceiling rounding, once we have the
4700                remainder.  Notice that we compute also the final remainder
4701                value here, and return the result right away.  */
4702             if (target == 0 || GET_MODE (target) != compute_mode)
4703               target = gen_reg_rtx (compute_mode);
4704             if (rem_flag)
4705               {
4706                 remainder= (REG_P (target)
4707                             ? target : gen_reg_rtx (compute_mode));
4708                 quotient = gen_reg_rtx (compute_mode);
4709               }
4710             else
4711               {
4712                 quotient = (REG_P (target)
4713                             ? target : gen_reg_rtx (compute_mode));
4714                 remainder = gen_reg_rtx (compute_mode);
4715               }
4716
4717             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4718                                      remainder, 0))
4719               {
4720                 /* This could be computed with a branch-less sequence.
4721                    Save that for later.  */
4722                 rtx tem;
4723                 rtx_code_label *label = gen_label_rtx ();
4724                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4725                                  compute_mode, label);
4726                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4727                                     NULL_RTX, 0, OPTAB_WIDEN);
4728                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4729                 expand_inc (quotient, const1_rtx);
4730                 expand_dec (remainder, op1);
4731                 emit_label (label);
4732                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4733               }
4734
4735             /* No luck with division elimination or divmod.  Have to do it
4736                by conditionally adjusting op0 *and* the result.  */
4737             {
4738               rtx_code_label *label1, *label2, *label3, *label4, *label5;
4739               rtx adjusted_op0;
4740               rtx tem;
4741
4742               quotient = gen_reg_rtx (compute_mode);
4743               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4744               label1 = gen_label_rtx ();
4745               label2 = gen_label_rtx ();
4746               label3 = gen_label_rtx ();
4747               label4 = gen_label_rtx ();
4748               label5 = gen_label_rtx ();
4749               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4750               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4751                                compute_mode, label1);
4752               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4753                                   quotient, 0, OPTAB_LIB_WIDEN);
4754               if (tem != quotient)
4755                 emit_move_insn (quotient, tem);
4756               emit_jump_insn (gen_jump (label5));
4757               emit_barrier ();
4758               emit_label (label1);
4759               expand_dec (adjusted_op0, const1_rtx);
4760               emit_jump_insn (gen_jump (label4));
4761               emit_barrier ();
4762               emit_label (label2);
4763               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4764                                compute_mode, label3);
4765               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4766                                   quotient, 0, OPTAB_LIB_WIDEN);
4767               if (tem != quotient)
4768                 emit_move_insn (quotient, tem);
4769               emit_jump_insn (gen_jump (label5));
4770               emit_barrier ();
4771               emit_label (label3);
4772               expand_inc (adjusted_op0, const1_rtx);
4773               emit_label (label4);
4774               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4775                                   quotient, 0, OPTAB_LIB_WIDEN);
4776               if (tem != quotient)
4777                 emit_move_insn (quotient, tem);
4778               expand_inc (quotient, const1_rtx);
4779               emit_label (label5);
4780             }
4781           }
4782         break;
4783
4784       case EXACT_DIV_EXPR:
4785         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4786           {
4787             HOST_WIDE_INT d = INTVAL (op1);
4788             unsigned HOST_WIDE_INT ml;
4789             int pre_shift;
4790             rtx t1;
4791
4792             pre_shift = floor_log2 (d & -d);
4793             ml = invert_mod2n (d >> pre_shift, size);
4794             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4795                                pre_shift, NULL_RTX, unsignedp);
4796             quotient = expand_mult (compute_mode, t1,
4797                                     gen_int_mode (ml, compute_mode),
4798                                     NULL_RTX, 1);
4799
4800             insn = get_last_insn ();
4801             set_dst_reg_note (insn, REG_EQUAL,
4802                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4803                                               compute_mode, op0, op1),
4804                               quotient);
4805           }
4806         break;
4807
4808       case ROUND_DIV_EXPR:
4809       case ROUND_MOD_EXPR:
4810         if (unsignedp)
4811           {
4812             rtx tem;
4813             rtx_code_label *label;
4814             label = gen_label_rtx ();
4815             quotient = gen_reg_rtx (compute_mode);
4816             remainder = gen_reg_rtx (compute_mode);
4817             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4818               {
4819                 rtx tem;
4820                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4821                                          quotient, 1, OPTAB_LIB_WIDEN);
4822                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4823                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4824                                           remainder, 1, OPTAB_LIB_WIDEN);
4825               }
4826             tem = plus_constant (compute_mode, op1, -1);
4827             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4828             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4829             expand_inc (quotient, const1_rtx);
4830             expand_dec (remainder, op1);
4831             emit_label (label);
4832           }
4833         else
4834           {
4835             rtx abs_rem, abs_op1, tem, mask;
4836             rtx_code_label *label;
4837             label = gen_label_rtx ();
4838             quotient = gen_reg_rtx (compute_mode);
4839             remainder = gen_reg_rtx (compute_mode);
4840             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4841               {
4842                 rtx tem;
4843                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4844                                          quotient, 0, OPTAB_LIB_WIDEN);
4845                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4846                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4847                                           remainder, 0, OPTAB_LIB_WIDEN);
4848               }
4849             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4850             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4851             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4852                                 1, NULL_RTX, 1);
4853             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4854             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4855                                 NULL_RTX, 0, OPTAB_WIDEN);
4856             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4857                                  size - 1, NULL_RTX, 0);
4858             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4859                                 NULL_RTX, 0, OPTAB_WIDEN);
4860             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4861                                 NULL_RTX, 0, OPTAB_WIDEN);
4862             expand_inc (quotient, tem);
4863             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4864                                 NULL_RTX, 0, OPTAB_WIDEN);
4865             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4866                                 NULL_RTX, 0, OPTAB_WIDEN);
4867             expand_dec (remainder, tem);
4868             emit_label (label);
4869           }
4870         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4871
4872       default:
4873         gcc_unreachable ();
4874       }
4875
4876   if (quotient == 0)
4877     {
4878       if (target && GET_MODE (target) != compute_mode)
4879         target = 0;
4880
4881       if (rem_flag)
4882         {
4883           /* Try to produce the remainder without producing the quotient.
4884              If we seem to have a divmod pattern that does not require widening,
4885              don't try widening here.  We should really have a WIDEN argument
4886              to expand_twoval_binop, since what we'd really like to do here is
4887              1) try a mod insn in compute_mode
4888              2) try a divmod insn in compute_mode
4889              3) try a div insn in compute_mode and multiply-subtract to get
4890                 remainder
4891              4) try the same things with widening allowed.  */
4892           remainder
4893             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4894                                  op0, op1, target,
4895                                  unsignedp,
4896                                  ((optab_handler (optab2, compute_mode)
4897                                    != CODE_FOR_nothing)
4898                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4899           if (remainder == 0)
4900             {
4901               /* No luck there.  Can we do remainder and divide at once
4902                  without a library call?  */
4903               remainder = gen_reg_rtx (compute_mode);
4904               if (! expand_twoval_binop ((unsignedp
4905                                           ? udivmod_optab
4906                                           : sdivmod_optab),
4907                                          op0, op1,
4908                                          NULL_RTX, remainder, unsignedp))
4909                 remainder = 0;
4910             }
4911
4912           if (remainder)
4913             return gen_lowpart (mode, remainder);
4914         }
4915
4916       /* Produce the quotient.  Try a quotient insn, but not a library call.
4917          If we have a divmod in this mode, use it in preference to widening
4918          the div (for this test we assume it will not fail). Note that optab2
4919          is set to the one of the two optabs that the call below will use.  */
4920       quotient
4921         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4922                              op0, op1, rem_flag ? NULL_RTX : target,
4923                              unsignedp,
4924                              ((optab_handler (optab2, compute_mode)
4925                                != CODE_FOR_nothing)
4926                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4927
4928       if (quotient == 0)
4929         {
4930           /* No luck there.  Try a quotient-and-remainder insn,
4931              keeping the quotient alone.  */
4932           quotient = gen_reg_rtx (compute_mode);
4933           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4934                                      op0, op1,
4935                                      quotient, NULL_RTX, unsignedp))
4936             {
4937               quotient = 0;
4938               if (! rem_flag)
4939                 /* Still no luck.  If we are not computing the remainder,
4940                    use a library call for the quotient.  */
4941                 quotient = sign_expand_binop (compute_mode,
4942                                               udiv_optab, sdiv_optab,
4943                                               op0, op1, target,
4944                                               unsignedp, OPTAB_LIB_WIDEN);
4945             }
4946         }
4947     }
4948
4949   if (rem_flag)
4950     {
4951       if (target && GET_MODE (target) != compute_mode)
4952         target = 0;
4953
4954       if (quotient == 0)
4955         {
4956           /* No divide instruction either.  Use library for remainder.  */
4957           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4958                                          op0, op1, target,
4959                                          unsignedp, OPTAB_LIB_WIDEN);
4960           /* No remainder function.  Try a quotient-and-remainder
4961              function, keeping the remainder.  */
4962           if (!remainder)
4963             {
4964               remainder = gen_reg_rtx (compute_mode);
4965               if (!expand_twoval_binop_libfunc
4966                   (unsignedp ? udivmod_optab : sdivmod_optab,
4967                    op0, op1,
4968                    NULL_RTX, remainder,
4969                    unsignedp ? UMOD : MOD))
4970                 remainder = NULL_RTX;
4971             }
4972         }
4973       else
4974         {
4975           /* We divided.  Now finish doing X - Y * (X / Y).  */
4976           remainder = expand_mult (compute_mode, quotient, op1,
4977                                    NULL_RTX, unsignedp);
4978           remainder = expand_binop (compute_mode, sub_optab, op0,
4979                                     remainder, target, unsignedp,
4980                                     OPTAB_LIB_WIDEN);
4981         }
4982     }
4983
4984   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4985 }
4986 \f
4987 /* Return a tree node with data type TYPE, describing the value of X.
4988    Usually this is an VAR_DECL, if there is no obvious better choice.
4989    X may be an expression, however we only support those expressions
4990    generated by loop.c.  */
4991
4992 tree
4993 make_tree (tree type, rtx x)
4994 {
4995   tree t;
4996
4997   switch (GET_CODE (x))
4998     {
4999     case CONST_INT:
5000     case CONST_WIDE_INT:
5001       t = wide_int_to_tree (type, std::make_pair (x, TYPE_MODE (type)));
5002       return t;
5003
5004     case CONST_DOUBLE:
5005       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5006       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5007         t = wide_int_to_tree (type,
5008                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5009                                                     HOST_BITS_PER_WIDE_INT * 2));
5010       else
5011         {
5012           REAL_VALUE_TYPE d;
5013
5014           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
5015           t = build_real (type, d);
5016         }
5017
5018       return t;
5019
5020     case CONST_VECTOR:
5021       {
5022         int units = CONST_VECTOR_NUNITS (x);
5023         tree itype = TREE_TYPE (type);
5024         tree *elts;
5025         int i;
5026
5027         /* Build a tree with vector elements.  */
5028         elts = XALLOCAVEC (tree, units);
5029         for (i = units - 1; i >= 0; --i)
5030           {
5031             rtx elt = CONST_VECTOR_ELT (x, i);
5032             elts[i] = make_tree (itype, elt);
5033           }
5034
5035         return build_vector (type, elts);
5036       }
5037
5038     case PLUS:
5039       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5040                           make_tree (type, XEXP (x, 1)));
5041
5042     case MINUS:
5043       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5044                           make_tree (type, XEXP (x, 1)));
5045
5046     case NEG:
5047       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5048
5049     case MULT:
5050       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5051                           make_tree (type, XEXP (x, 1)));
5052
5053     case ASHIFT:
5054       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5055                           make_tree (type, XEXP (x, 1)));
5056
5057     case LSHIFTRT:
5058       t = unsigned_type_for (type);
5059       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5060                                          make_tree (t, XEXP (x, 0)),
5061                                          make_tree (type, XEXP (x, 1))));
5062
5063     case ASHIFTRT:
5064       t = signed_type_for (type);
5065       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5066                                          make_tree (t, XEXP (x, 0)),
5067                                          make_tree (type, XEXP (x, 1))));
5068
5069     case DIV:
5070       if (TREE_CODE (type) != REAL_TYPE)
5071         t = signed_type_for (type);
5072       else
5073         t = type;
5074
5075       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5076                                          make_tree (t, XEXP (x, 0)),
5077                                          make_tree (t, XEXP (x, 1))));
5078     case UDIV:
5079       t = unsigned_type_for (type);
5080       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5081                                          make_tree (t, XEXP (x, 0)),
5082                                          make_tree (t, XEXP (x, 1))));
5083
5084     case SIGN_EXTEND:
5085     case ZERO_EXTEND:
5086       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5087                                           GET_CODE (x) == ZERO_EXTEND);
5088       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5089
5090     case CONST:
5091       return make_tree (type, XEXP (x, 0));
5092
5093     case SYMBOL_REF:
5094       t = SYMBOL_REF_DECL (x);
5095       if (t)
5096         return fold_convert (type, build_fold_addr_expr (t));
5097       /* else fall through.  */
5098
5099     default:
5100       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5101
5102       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5103          address mode to pointer mode.  */
5104       if (POINTER_TYPE_P (type))
5105         x = convert_memory_address_addr_space
5106               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5107
5108       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5109          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5110       t->decl_with_rtl.rtl = x;
5111
5112       return t;
5113     }
5114 }
5115 \f
5116 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5117    and returning TARGET.
5118
5119    If TARGET is 0, a pseudo-register or constant is returned.  */
5120
5121 rtx
5122 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5123 {
5124   rtx tem = 0;
5125
5126   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5127     tem = simplify_binary_operation (AND, mode, op0, op1);
5128   if (tem == 0)
5129     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5130
5131   if (target == 0)
5132     target = tem;
5133   else if (tem != target)
5134     emit_move_insn (target, tem);
5135   return target;
5136 }
5137
5138 /* Helper function for emit_store_flag.  */
5139 rtx
5140 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5141              machine_mode mode, machine_mode compare_mode,
5142              int unsignedp, rtx x, rtx y, int normalizep,
5143              machine_mode target_mode)
5144 {
5145   struct expand_operand ops[4];
5146   rtx op0, comparison, subtarget;
5147   rtx_insn *last;
5148   machine_mode result_mode = targetm.cstore_mode (icode);
5149
5150   last = get_last_insn ();
5151   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5152   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5153   if (!x || !y)
5154     {
5155       delete_insns_since (last);
5156       return NULL_RTX;
5157     }
5158
5159   if (target_mode == VOIDmode)
5160     target_mode = result_mode;
5161   if (!target)
5162     target = gen_reg_rtx (target_mode);
5163
5164   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5165
5166   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5167   create_fixed_operand (&ops[1], comparison);
5168   create_fixed_operand (&ops[2], x);
5169   create_fixed_operand (&ops[3], y);
5170   if (!maybe_expand_insn (icode, 4, ops))
5171     {
5172       delete_insns_since (last);
5173       return NULL_RTX;
5174     }
5175   subtarget = ops[0].value;
5176
5177   /* If we are converting to a wider mode, first convert to
5178      TARGET_MODE, then normalize.  This produces better combining
5179      opportunities on machines that have a SIGN_EXTRACT when we are
5180      testing a single bit.  This mostly benefits the 68k.
5181
5182      If STORE_FLAG_VALUE does not have the sign bit set when
5183      interpreted in MODE, we can do this conversion as unsigned, which
5184      is usually more efficient.  */
5185   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5186     {
5187       convert_move (target, subtarget,
5188                     val_signbit_known_clear_p (result_mode,
5189                                                STORE_FLAG_VALUE));
5190       op0 = target;
5191       result_mode = target_mode;
5192     }
5193   else
5194     op0 = subtarget;
5195
5196   /* If we want to keep subexpressions around, don't reuse our last
5197      target.  */
5198   if (optimize)
5199     subtarget = 0;
5200
5201   /* Now normalize to the proper value in MODE.  Sometimes we don't
5202      have to do anything.  */
5203   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5204     ;
5205   /* STORE_FLAG_VALUE might be the most negative number, so write
5206      the comparison this way to avoid a compiler-time warning.  */
5207   else if (- normalizep == STORE_FLAG_VALUE)
5208     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5209
5210   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5211      it hard to use a value of just the sign bit due to ANSI integer
5212      constant typing rules.  */
5213   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5214     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5215                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5216                         normalizep == 1);
5217   else
5218     {
5219       gcc_assert (STORE_FLAG_VALUE & 1);
5220
5221       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5222       if (normalizep == -1)
5223         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5224     }
5225
5226   /* If we were converting to a smaller mode, do the conversion now.  */
5227   if (target_mode != result_mode)
5228     {
5229       convert_move (target, op0, 0);
5230       return target;
5231     }
5232   else
5233     return op0;
5234 }
5235
5236
5237 /* A subroutine of emit_store_flag only including "tricks" that do not
5238    need a recursive call.  These are kept separate to avoid infinite
5239    loops.  */
5240
5241 static rtx
5242 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5243                    machine_mode mode, int unsignedp, int normalizep,
5244                    machine_mode target_mode)
5245 {
5246   rtx subtarget;
5247   enum insn_code icode;
5248   machine_mode compare_mode;
5249   enum mode_class mclass;
5250   enum rtx_code scode;
5251   rtx tem;
5252
5253   if (unsignedp)
5254     code = unsigned_condition (code);
5255   scode = swap_condition (code);
5256
5257   /* If one operand is constant, make it the second one.  Only do this
5258      if the other operand is not constant as well.  */
5259
5260   if (swap_commutative_operands_p (op0, op1))
5261     {
5262       tem = op0;
5263       op0 = op1;
5264       op1 = tem;
5265       code = swap_condition (code);
5266     }
5267
5268   if (mode == VOIDmode)
5269     mode = GET_MODE (op0);
5270
5271   /* For some comparisons with 1 and -1, we can convert this to
5272      comparisons with zero.  This will often produce more opportunities for
5273      store-flag insns.  */
5274
5275   switch (code)
5276     {
5277     case LT:
5278       if (op1 == const1_rtx)
5279         op1 = const0_rtx, code = LE;
5280       break;
5281     case LE:
5282       if (op1 == constm1_rtx)
5283         op1 = const0_rtx, code = LT;
5284       break;
5285     case GE:
5286       if (op1 == const1_rtx)
5287         op1 = const0_rtx, code = GT;
5288       break;
5289     case GT:
5290       if (op1 == constm1_rtx)
5291         op1 = const0_rtx, code = GE;
5292       break;
5293     case GEU:
5294       if (op1 == const1_rtx)
5295         op1 = const0_rtx, code = NE;
5296       break;
5297     case LTU:
5298       if (op1 == const1_rtx)
5299         op1 = const0_rtx, code = EQ;
5300       break;
5301     default:
5302       break;
5303     }
5304
5305   /* If we are comparing a double-word integer with zero or -1, we can
5306      convert the comparison into one involving a single word.  */
5307   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5308       && GET_MODE_CLASS (mode) == MODE_INT
5309       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5310     {
5311       if ((code == EQ || code == NE)
5312           && (op1 == const0_rtx || op1 == constm1_rtx))
5313         {
5314           rtx op00, op01;
5315
5316           /* Do a logical OR or AND of the two words and compare the
5317              result.  */
5318           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5319           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5320           tem = expand_binop (word_mode,
5321                               op1 == const0_rtx ? ior_optab : and_optab,
5322                               op00, op01, NULL_RTX, unsignedp,
5323                               OPTAB_DIRECT);
5324
5325           if (tem != 0)
5326             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5327                                    unsignedp, normalizep);
5328         }
5329       else if ((code == LT || code == GE) && op1 == const0_rtx)
5330         {
5331           rtx op0h;
5332
5333           /* If testing the sign bit, can just test on high word.  */
5334           op0h = simplify_gen_subreg (word_mode, op0, mode,
5335                                       subreg_highpart_offset (word_mode,
5336                                                               mode));
5337           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5338                                  unsignedp, normalizep);
5339         }
5340       else
5341         tem = NULL_RTX;
5342
5343       if (tem)
5344         {
5345           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5346             return tem;
5347           if (!target)
5348             target = gen_reg_rtx (target_mode);
5349
5350           convert_move (target, tem,
5351                         !val_signbit_known_set_p (word_mode,
5352                                                   (normalizep ? normalizep
5353                                                    : STORE_FLAG_VALUE)));
5354           return target;
5355         }
5356     }
5357
5358   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5359      complement of A (for GE) and shifting the sign bit to the low bit.  */
5360   if (op1 == const0_rtx && (code == LT || code == GE)
5361       && GET_MODE_CLASS (mode) == MODE_INT
5362       && (normalizep || STORE_FLAG_VALUE == 1
5363           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5364     {
5365       subtarget = target;
5366
5367       if (!target)
5368         target_mode = mode;
5369
5370       /* If the result is to be wider than OP0, it is best to convert it
5371          first.  If it is to be narrower, it is *incorrect* to convert it
5372          first.  */
5373       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5374         {
5375           op0 = convert_modes (target_mode, mode, op0, 0);
5376           mode = target_mode;
5377         }
5378
5379       if (target_mode != mode)
5380         subtarget = 0;
5381
5382       if (code == GE)
5383         op0 = expand_unop (mode, one_cmpl_optab, op0,
5384                            ((STORE_FLAG_VALUE == 1 || normalizep)
5385                             ? 0 : subtarget), 0);
5386
5387       if (STORE_FLAG_VALUE == 1 || normalizep)
5388         /* If we are supposed to produce a 0/1 value, we want to do
5389            a logical shift from the sign bit to the low-order bit; for
5390            a -1/0 value, we do an arithmetic shift.  */
5391         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5392                             GET_MODE_BITSIZE (mode) - 1,
5393                             subtarget, normalizep != -1);
5394
5395       if (mode != target_mode)
5396         op0 = convert_modes (target_mode, mode, op0, 0);
5397
5398       return op0;
5399     }
5400
5401   mclass = GET_MODE_CLASS (mode);
5402   for (compare_mode = mode; compare_mode != VOIDmode;
5403        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5404     {
5405      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5406      icode = optab_handler (cstore_optab, optab_mode);
5407      if (icode != CODE_FOR_nothing)
5408         {
5409           do_pending_stack_adjust ();
5410           tem = emit_cstore (target, icode, code, mode, compare_mode,
5411                              unsignedp, op0, op1, normalizep, target_mode);
5412           if (tem)
5413             return tem;
5414
5415           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5416             {
5417               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5418                                  unsignedp, op1, op0, normalizep, target_mode);
5419               if (tem)
5420                 return tem;
5421             }
5422           break;
5423         }
5424     }
5425
5426   return 0;
5427 }
5428
5429 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5430    and storing in TARGET.  Normally return TARGET.
5431    Return 0 if that cannot be done.
5432
5433    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5434    it is VOIDmode, they cannot both be CONST_INT.
5435
5436    UNSIGNEDP is for the case where we have to widen the operands
5437    to perform the operation.  It says to use zero-extension.
5438
5439    NORMALIZEP is 1 if we should convert the result to be either zero
5440    or one.  Normalize is -1 if we should convert the result to be
5441    either zero or -1.  If NORMALIZEP is zero, the result will be left
5442    "raw" out of the scc insn.  */
5443
5444 rtx
5445 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5446                  machine_mode mode, int unsignedp, int normalizep)
5447 {
5448   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5449   enum rtx_code rcode;
5450   rtx subtarget;
5451   rtx tem, trueval;
5452   rtx_insn *last;
5453
5454   /* If we compare constants, we shouldn't use a store-flag operation,
5455      but a constant load.  We can get there via the vanilla route that
5456      usually generates a compare-branch sequence, but will in this case
5457      fold the comparison to a constant, and thus elide the branch.  */
5458   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5459     return NULL_RTX;
5460
5461   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5462                            target_mode);
5463   if (tem)
5464     return tem;
5465
5466   /* If we reached here, we can't do this with a scc insn, however there
5467      are some comparisons that can be done in other ways.  Don't do any
5468      of these cases if branches are very cheap.  */
5469   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5470     return 0;
5471
5472   /* See what we need to return.  We can only return a 1, -1, or the
5473      sign bit.  */
5474
5475   if (normalizep == 0)
5476     {
5477       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5478         normalizep = STORE_FLAG_VALUE;
5479
5480       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5481         ;
5482       else
5483         return 0;
5484     }
5485
5486   last = get_last_insn ();
5487
5488   /* If optimizing, use different pseudo registers for each insn, instead
5489      of reusing the same pseudo.  This leads to better CSE, but slows
5490      down the compiler, since there are more pseudos */
5491   subtarget = (!optimize
5492                && (target_mode == mode)) ? target : NULL_RTX;
5493   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5494
5495   /* For floating-point comparisons, try the reverse comparison or try
5496      changing the "orderedness" of the comparison.  */
5497   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5498     {
5499       enum rtx_code first_code;
5500       bool and_them;
5501
5502       rcode = reverse_condition_maybe_unordered (code);
5503       if (can_compare_p (rcode, mode, ccp_store_flag)
5504           && (code == ORDERED || code == UNORDERED
5505               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5506               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5507         {
5508           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5509                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5510
5511           /* For the reverse comparison, use either an addition or a XOR.  */
5512           if (want_add
5513               && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5514                            optimize_insn_for_speed_p ()) == 0)
5515             {
5516               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5517                                        STORE_FLAG_VALUE, target_mode);
5518               if (tem)
5519                 return expand_binop (target_mode, add_optab, tem,
5520                                      gen_int_mode (normalizep, target_mode),
5521                                      target, 0, OPTAB_WIDEN);
5522             }
5523           else if (!want_add
5524                    && rtx_cost (trueval, XOR, 1,
5525                                 optimize_insn_for_speed_p ()) == 0)
5526             {
5527               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5528                                        normalizep, target_mode);
5529               if (tem)
5530                 return expand_binop (target_mode, xor_optab, tem, trueval,
5531                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5532             }
5533         }
5534
5535       delete_insns_since (last);
5536
5537       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5538       if (code == ORDERED || code == UNORDERED)
5539         return 0;
5540
5541       and_them = split_comparison (code, mode, &first_code, &code);
5542
5543       /* If there are no NaNs, the first comparison should always fall through.
5544          Effectively change the comparison to the other one.  */
5545       if (!HONOR_NANS (mode))
5546         {
5547           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5548           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5549                                     target_mode);
5550         }
5551
5552 #ifdef HAVE_conditional_move
5553       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5554          conditional move.  */
5555       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5556                                normalizep, target_mode);
5557       if (tem == 0)
5558         return 0;
5559
5560       if (and_them)
5561         tem = emit_conditional_move (target, code, op0, op1, mode,
5562                                      tem, const0_rtx, GET_MODE (tem), 0);
5563       else
5564         tem = emit_conditional_move (target, code, op0, op1, mode,
5565                                      trueval, tem, GET_MODE (tem), 0);
5566
5567       if (tem == 0)
5568         delete_insns_since (last);
5569       return tem;
5570 #else
5571       return 0;
5572 #endif
5573     }
5574
5575   /* The remaining tricks only apply to integer comparisons.  */
5576
5577   if (GET_MODE_CLASS (mode) != MODE_INT)
5578     return 0;
5579
5580   /* If this is an equality comparison of integers, we can try to exclusive-or
5581      (or subtract) the two operands and use a recursive call to try the
5582      comparison with zero.  Don't do any of these cases if branches are
5583      very cheap.  */
5584
5585   if ((code == EQ || code == NE) && op1 != const0_rtx)
5586     {
5587       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5588                           OPTAB_WIDEN);
5589
5590       if (tem == 0)
5591         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5592                             OPTAB_WIDEN);
5593       if (tem != 0)
5594         tem = emit_store_flag (target, code, tem, const0_rtx,
5595                                mode, unsignedp, normalizep);
5596       if (tem != 0)
5597         return tem;
5598
5599       delete_insns_since (last);
5600     }
5601
5602   /* For integer comparisons, try the reverse comparison.  However, for
5603      small X and if we'd have anyway to extend, implementing "X != 0"
5604      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5605   rcode = reverse_condition (code);
5606   if (can_compare_p (rcode, mode, ccp_store_flag)
5607       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5608             && code == NE
5609             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5610             && op1 == const0_rtx))
5611     {
5612       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5613                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5614
5615       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5616       if (want_add
5617           && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5618                        optimize_insn_for_speed_p ()) == 0)
5619         {
5620           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5621                                    STORE_FLAG_VALUE, target_mode);
5622           if (tem != 0)
5623             tem = expand_binop (target_mode, add_optab, tem,
5624                                 gen_int_mode (normalizep, target_mode),
5625                                 target, 0, OPTAB_WIDEN);
5626         }
5627       else if (!want_add
5628                && rtx_cost (trueval, XOR, 1,
5629                             optimize_insn_for_speed_p ()) == 0)
5630         {
5631           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5632                                    normalizep, target_mode);
5633           if (tem != 0)
5634             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5635                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5636         }
5637
5638       if (tem != 0)
5639         return tem;
5640       delete_insns_since (last);
5641     }
5642
5643   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5644      the constant zero.  Reject all other comparisons at this point.  Only
5645      do LE and GT if branches are expensive since they are expensive on
5646      2-operand machines.  */
5647
5648   if (op1 != const0_rtx
5649       || (code != EQ && code != NE
5650           && (BRANCH_COST (optimize_insn_for_speed_p (),
5651                            false) <= 1 || (code != LE && code != GT))))
5652     return 0;
5653
5654   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5655      do the necessary operation below.  */
5656
5657   tem = 0;
5658
5659   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5660      the sign bit set.  */
5661
5662   if (code == LE)
5663     {
5664       /* This is destructive, so SUBTARGET can't be OP0.  */
5665       if (rtx_equal_p (subtarget, op0))
5666         subtarget = 0;
5667
5668       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5669                           OPTAB_WIDEN);
5670       if (tem)
5671         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5672                             OPTAB_WIDEN);
5673     }
5674
5675   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5676      number of bits in the mode of OP0, minus one.  */
5677
5678   if (code == GT)
5679     {
5680       if (rtx_equal_p (subtarget, op0))
5681         subtarget = 0;
5682
5683       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5684                           GET_MODE_BITSIZE (mode) - 1,
5685                           subtarget, 0);
5686       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5687                           OPTAB_WIDEN);
5688     }
5689
5690   if (code == EQ || code == NE)
5691     {
5692       /* For EQ or NE, one way to do the comparison is to apply an operation
5693          that converts the operand into a positive number if it is nonzero
5694          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5695          for NE we negate.  This puts the result in the sign bit.  Then we
5696          normalize with a shift, if needed.
5697
5698          Two operations that can do the above actions are ABS and FFS, so try
5699          them.  If that doesn't work, and MODE is smaller than a full word,
5700          we can use zero-extension to the wider mode (an unsigned conversion)
5701          as the operation.  */
5702
5703       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5704          that is compensated by the subsequent overflow when subtracting
5705          one / negating.  */
5706
5707       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5708         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5709       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5710         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5711       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5712         {
5713           tem = convert_modes (word_mode, mode, op0, 1);
5714           mode = word_mode;
5715         }
5716
5717       if (tem != 0)
5718         {
5719           if (code == EQ)
5720             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5721                                 0, OPTAB_WIDEN);
5722           else
5723             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5724         }
5725
5726       /* If we couldn't do it that way, for NE we can "or" the two's complement
5727          of the value with itself.  For EQ, we take the one's complement of
5728          that "or", which is an extra insn, so we only handle EQ if branches
5729          are expensive.  */
5730
5731       if (tem == 0
5732           && (code == NE
5733               || BRANCH_COST (optimize_insn_for_speed_p (),
5734                               false) > 1))
5735         {
5736           if (rtx_equal_p (subtarget, op0))
5737             subtarget = 0;
5738
5739           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5740           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5741                               OPTAB_WIDEN);
5742
5743           if (tem && code == EQ)
5744             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5745         }
5746     }
5747
5748   if (tem && normalizep)
5749     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5750                         GET_MODE_BITSIZE (mode) - 1,
5751                         subtarget, normalizep == 1);
5752
5753   if (tem)
5754     {
5755       if (!target)
5756         ;
5757       else if (GET_MODE (tem) != target_mode)
5758         {
5759           convert_move (target, tem, 0);
5760           tem = target;
5761         }
5762       else if (!subtarget)
5763         {
5764           emit_move_insn (target, tem);
5765           tem = target;
5766         }
5767     }
5768   else
5769     delete_insns_since (last);
5770
5771   return tem;
5772 }
5773
5774 /* Like emit_store_flag, but always succeeds.  */
5775
5776 rtx
5777 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5778                        machine_mode mode, int unsignedp, int normalizep)
5779 {
5780   rtx tem;
5781   rtx_code_label *label;
5782   rtx trueval, falseval;
5783
5784   /* First see if emit_store_flag can do the job.  */
5785   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5786   if (tem != 0)
5787     return tem;
5788
5789   if (!target)
5790     target = gen_reg_rtx (word_mode);
5791
5792   /* If this failed, we have to do this with set/compare/jump/set code.
5793      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5794   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5795   if (code == NE
5796       && GET_MODE_CLASS (mode) == MODE_INT
5797       && REG_P (target)
5798       && op0 == target
5799       && op1 == const0_rtx)
5800     {
5801       label = gen_label_rtx ();
5802       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5803                                mode, NULL_RTX, NULL_RTX, label, -1);
5804       emit_move_insn (target, trueval);
5805       emit_label (label);
5806       return target;
5807     }
5808
5809   if (!REG_P (target)
5810       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5811     target = gen_reg_rtx (GET_MODE (target));
5812
5813   /* Jump in the right direction if the target cannot implement CODE
5814      but can jump on its reverse condition.  */
5815   falseval = const0_rtx;
5816   if (! can_compare_p (code, mode, ccp_jump)
5817       && (! FLOAT_MODE_P (mode)
5818           || code == ORDERED || code == UNORDERED
5819           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5820           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5821     {
5822       enum rtx_code rcode;
5823       if (FLOAT_MODE_P (mode))
5824         rcode = reverse_condition_maybe_unordered (code);
5825       else
5826         rcode = reverse_condition (code);
5827
5828       /* Canonicalize to UNORDERED for the libcall.  */
5829       if (can_compare_p (rcode, mode, ccp_jump)
5830           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5831         {
5832           falseval = trueval;
5833           trueval = const0_rtx;
5834           code = rcode;
5835         }
5836     }
5837
5838   emit_move_insn (target, trueval);
5839   label = gen_label_rtx ();
5840   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5841                            NULL_RTX, label, -1);
5842
5843   emit_move_insn (target, falseval);
5844   emit_label (label);
5845
5846   return target;
5847 }
5848 \f
5849 /* Perform possibly multi-word comparison and conditional jump to LABEL
5850    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5851    now a thin wrapper around do_compare_rtx_and_jump.  */
5852
5853 static void
5854 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
5855                  rtx_code_label *label)
5856 {
5857   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5858   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5859                            NULL_RTX, NULL_RTX, label, -1);
5860 }