gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2015 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "diagnostic-core.h"
  27 #include "rtl.h"
  28 #include "hash-set.h"
  29 #include "machmode.h"
  30 #include "vec.h"
  31 #include "double-int.h"
  32 #include "input.h"
  33 #include "alias.h"
  34 #include "symtab.h"
  35 #include "wide-int.h"
  36 #include "inchash.h"
  37 #include "tree.h"
  38 #include "fold-const.h"
  39 #include "stor-layout.h"
  40 #include "tm_p.h"
  41 #include "flags.h"
  42 #include "insn-config.h"
  43 #include "hashtab.h"
  44 #include "hard-reg-set.h"
  45 #include "function.h"
  46 #include "statistics.h"
  47 #include "real.h"
  48 #include "fixed-value.h"
  49 #include "expmed.h"
  50 #include "dojump.h"
  51 #include "explow.h"
  52 #include "calls.h"
  53 #include "emit-rtl.h"
  54 #include "varasm.h"
  55 #include "stmt.h"
  56 #include "expr.h"
  57 #include "insn-codes.h"
  58 #include "optabs.h"
  59 #include "recog.h"
  60 #include "langhooks.h"
  61 #include "predict.h"
  62 #include "basic-block.h"
  63 #include "df.h"
  64 #include "target.h"
  65
  66 struct target_expmed default_target_expmed;
  67 #if SWITCHABLE_TARGET
  68 struct target_expmed *this_target_expmed = &default_target_expmed;
  69 #endif
  70
  71 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  72                                    unsigned HOST_WIDE_INT,
  73                                    unsigned HOST_WIDE_INT,
  74                                    unsigned HOST_WIDE_INT,
  75                                    rtx);
  76 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
  77                                      unsigned HOST_WIDE_INT,
  78                                      rtx);
  79 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  80                                    unsigned HOST_WIDE_INT,
  81                                    unsigned HOST_WIDE_INT,
  82                                    unsigned HOST_WIDE_INT,
  83                                    rtx);
  84 static rtx extract_fixed_bit_field (machine_mode, rtx,
  85                                     unsigned HOST_WIDE_INT,
  86                                     unsigned HOST_WIDE_INT, rtx, int);
  87 static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
  88                                       unsigned HOST_WIDE_INT,
  89                                       unsigned HOST_WIDE_INT, rtx, int);
  90 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  91 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  92                                     unsigned HOST_WIDE_INT, int);
  93 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  94 static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  95 static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  96
  97 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  98    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  99    The mask is truncated if necessary to the width of mode MODE.  The
 100    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
 101
 102 static inline rtx
 103 mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
 104 {
 105   return immed_wide_int_const
 106     (wi::shifted_mask (bitpos, bitsize, complement,
 107                        GET_MODE_PRECISION (mode)), mode);
 108 }
 109
 110 /* Test whether a value is zero of a power of two.  */
 111 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
 112   (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
 113
 114 struct init_expmed_rtl
 115 {
 116   rtx reg;
 117   rtx plus;
 118   rtx neg;
 119   rtx mult;
 120   rtx sdiv;
 121   rtx udiv;
 122   rtx sdiv_32;
 123   rtx smod_32;
 124   rtx wide_mult;
 125   rtx wide_lshr;
 126   rtx wide_trunc;
 127   rtx shift;
 128   rtx shift_mult;
 129   rtx shift_add;
 130   rtx shift_sub0;
 131   rtx shift_sub1;
 132   rtx zext;
 133   rtx trunc;
 134
 135   rtx pow2[MAX_BITS_PER_WORD];
 136   rtx cint[MAX_BITS_PER_WORD];
 137 };
 138
 139 static void
 140 init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
 141                       machine_mode from_mode, bool speed)
 142 {
 143   int to_size, from_size;
 144   rtx which;
 145
 146   to_size = GET_MODE_PRECISION (to_mode);
 147   from_size = GET_MODE_PRECISION (from_mode);
 148
 149   /* Most partial integers have a precision less than the "full"
 150      integer it requires for storage.  In case one doesn't, for
 151      comparison purposes here, reduce the bit size by one in that
 152      case.  */
 153   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 154       && exact_log2 (to_size) != -1)
 155     to_size --;
 156   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 157       && exact_log2 (from_size) != -1)
 158     from_size --;
 159
 160   /* Assume cost of zero-extend and sign-extend is the same.  */
 161   which = (to_size < from_size ? all->trunc : all->zext);
 162
 163   PUT_MODE (all->reg, from_mode);
 164   set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
 165 }
 166
 167 static void
 168 init_expmed_one_mode (struct init_expmed_rtl *all,
 169                       machine_mode mode, int speed)
 170 {
 171   int m, n, mode_bitsize;
 172   machine_mode mode_from;
 173
 174   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 175
 176   PUT_MODE (all->reg, mode);
 177   PUT_MODE (all->plus, mode);
 178   PUT_MODE (all->neg, mode);
 179   PUT_MODE (all->mult, mode);
 180   PUT_MODE (all->sdiv, mode);
 181   PUT_MODE (all->udiv, mode);
 182   PUT_MODE (all->sdiv_32, mode);
 183   PUT_MODE (all->smod_32, mode);
 184   PUT_MODE (all->wide_trunc, mode);
 185   PUT_MODE (all->shift, mode);
 186   PUT_MODE (all->shift_mult, mode);
 187   PUT_MODE (all->shift_add, mode);
 188   PUT_MODE (all->shift_sub0, mode);
 189   PUT_MODE (all->shift_sub1, mode);
 190   PUT_MODE (all->zext, mode);
 191   PUT_MODE (all->trunc, mode);
 192
 193   set_add_cost (speed, mode, set_src_cost (all->plus, speed));
 194   set_neg_cost (speed, mode, set_src_cost (all->neg, speed));
 195   set_mul_cost (speed, mode, set_src_cost (all->mult, speed));
 196   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, speed));
 197   set_udiv_cost (speed, mode, set_src_cost (all->udiv, speed));
 198
 199   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, speed)
 200                                      <= 2 * add_cost (speed, mode)));
 201   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, speed)
 202                                      <= 4 * add_cost (speed, mode)));
 203
 204   set_shift_cost (speed, mode, 0, 0);
 205   {
 206     int cost = add_cost (speed, mode);
 207     set_shiftadd_cost (speed, mode, 0, cost);
 208     set_shiftsub0_cost (speed, mode, 0, cost);
 209     set_shiftsub1_cost (speed, mode, 0, cost);
 210   }
 211
 212   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 213   for (m = 1; m < n; m++)
 214     {
 215       XEXP (all->shift, 1) = all->cint[m];
 216       XEXP (all->shift_mult, 1) = all->pow2[m];
 217
 218       set_shift_cost (speed, mode, m, set_src_cost (all->shift, speed));
 219       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, speed));
 220       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, speed));
 221       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, speed));
 222     }
 223
 224   if (SCALAR_INT_MODE_P (mode))
 225     {
 226       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 227            mode_from = (machine_mode)(mode_from + 1))
 228         init_expmed_one_conv (all, mode, mode_from, speed);
 229     }
 230   if (GET_MODE_CLASS (mode) == MODE_INT)
 231     {
 232       machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
 233       if (wider_mode != VOIDmode)
 234         {
 235           PUT_MODE (all->zext, wider_mode);
 236           PUT_MODE (all->wide_mult, wider_mode);
 237           PUT_MODE (all->wide_lshr, wider_mode);
 238           XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 239
 240           set_mul_widen_cost (speed, wider_mode,
 241                               set_src_cost (all->wide_mult, speed));
 242           set_mul_highpart_cost (speed, mode,
 243                                  set_src_cost (all->wide_trunc, speed));
 244         }
 245     }
 246 }
 247
 248 void
 249 init_expmed (void)
 250 {
 251   struct init_expmed_rtl all;
 252   machine_mode mode = QImode;
 253   int m, speed;
 254
 255   memset (&all, 0, sizeof all);
 256   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 257     {
 258       all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 259       all.cint[m] = GEN_INT (m);
 260     }
 261
 262   /* Avoid using hard regs in ways which may be unsupported.  */
 263   all.reg = gen_rtx_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 264   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 265   all.neg = gen_rtx_NEG (mode, all.reg);
 266   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 267   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 268   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 269   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 270   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 271   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 272   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 273   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 274   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 275   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 276   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 277   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 278   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 279   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 280   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 281
 282   for (speed = 0; speed < 2; speed++)
 283     {
 284       crtl->maybe_hot_insn_p = speed;
 285       set_zero_cost (speed, set_src_cost (const0_rtx, speed));
 286
 287       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 288            mode = (machine_mode)(mode + 1))
 289         init_expmed_one_mode (&all, mode, speed);
 290
 291       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 292         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 293              mode = (machine_mode)(mode + 1))
 294           init_expmed_one_mode (&all, mode, speed);
 295
 296       if (MIN_MODE_VECTOR_INT != VOIDmode)
 297         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 298              mode = (machine_mode)(mode + 1))
 299           init_expmed_one_mode (&all, mode, speed);
 300     }
 301
 302   if (alg_hash_used_p ())
 303     {
 304       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 305       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 306     }
 307   else
 308     set_alg_hash_used_p (true);
 309   default_rtl_profile ();
 310
 311   ggc_free (all.trunc);
 312   ggc_free (all.shift_sub1);
 313   ggc_free (all.shift_sub0);
 314   ggc_free (all.shift_add);
 315   ggc_free (all.shift_mult);
 316   ggc_free (all.shift);
 317   ggc_free (all.wide_trunc);
 318   ggc_free (all.wide_lshr);
 319   ggc_free (all.wide_mult);
 320   ggc_free (all.zext);
 321   ggc_free (all.smod_32);
 322   ggc_free (all.sdiv_32);
 323   ggc_free (all.udiv);
 324   ggc_free (all.sdiv);
 325   ggc_free (all.mult);
 326   ggc_free (all.neg);
 327   ggc_free (all.plus);
 328   ggc_free (all.reg);
 329 }
 330
 331 /* Return an rtx representing minus the value of X.
 332    MODE is the intended mode of the result,
 333    useful if X is a CONST_INT.  */
 334
 335 rtx
 336 negate_rtx (machine_mode mode, rtx x)
 337 {
 338   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 339
 340   if (result == 0)
 341     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 342
 343   return result;
 344 }
 345
 346 /* Adjust bitfield memory MEM so that it points to the first unit of mode
 347    MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
 348    If MODE is BLKmode, return a reference to every byte in the bitfield.
 349    Set *NEW_BITNUM to the bit position of the field within the new memory.  */
 350
 351 static rtx
 352 narrow_bit_field_mem (rtx mem, machine_mode mode,
 353                       unsigned HOST_WIDE_INT bitsize,
 354                       unsigned HOST_WIDE_INT bitnum,
 355                       unsigned HOST_WIDE_INT *new_bitnum)
 356 {
 357   if (mode == BLKmode)
 358     {
 359       *new_bitnum = bitnum % BITS_PER_UNIT;
 360       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 361       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 362                             / BITS_PER_UNIT);
 363       return adjust_bitfield_address_size (mem, mode, offset, size);
 364     }
 365   else
 366     {
 367       unsigned int unit = GET_MODE_BITSIZE (mode);
 368       *new_bitnum = bitnum % unit;
 369       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 370       return adjust_bitfield_address (mem, mode, offset);
 371     }
 372 }
 373
 374 /* The caller wants to perform insertion or extraction PATTERN on a
 375    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 376    BITREGION_START and BITREGION_END are as for store_bit_field
 377    and FIELDMODE is the natural mode of the field.
 378
 379    Search for a mode that is compatible with the memory access
 380    restrictions and (where applicable) with a register insertion or
 381    extraction.  Return the new memory on success, storing the adjusted
 382    bit position in *NEW_BITNUM.  Return null otherwise.  */
 383
 384 static rtx
 385 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 386                               rtx op0, HOST_WIDE_INT bitsize,
 387                               HOST_WIDE_INT bitnum,
 388                               unsigned HOST_WIDE_INT bitregion_start,
 389                               unsigned HOST_WIDE_INT bitregion_end,
 390                               machine_mode fieldmode,
 391                               unsigned HOST_WIDE_INT *new_bitnum)
 392 {
 393   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 394                                 bitregion_end, MEM_ALIGN (op0),
 395                                 MEM_VOLATILE_P (op0));
 396   machine_mode best_mode;
 397   if (iter.next_mode (&best_mode))
 398     {
 399       /* We can use a memory in BEST_MODE.  See whether this is true for
 400          any wider modes.  All other things being equal, we prefer to
 401          use the widest mode possible because it tends to expose more
 402          CSE opportunities.  */
 403       if (!iter.prefer_smaller_modes ())
 404         {
 405           /* Limit the search to the mode required by the corresponding
 406              register insertion or extraction instruction, if any.  */
 407           machine_mode limit_mode = word_mode;
 408           extraction_insn insn;
 409           if (get_best_reg_extraction_insn (&insn, pattern,
 410                                             GET_MODE_BITSIZE (best_mode),
 411                                             fieldmode))
 412             limit_mode = insn.field_mode;
 413
 414           machine_mode wider_mode;
 415           while (iter.next_mode (&wider_mode)
 416                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 417             best_mode = wider_mode;
 418         }
 419       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 420                                    new_bitnum);
 421     }
 422   return NULL_RTX;
 423 }
 424
 425 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 426    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 427    offset is then BITNUM / BITS_PER_UNIT.  */
 428
 429 static bool
 430 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 431                      unsigned HOST_WIDE_INT bitsize,
 432                      machine_mode struct_mode)
 433 {
 434   if (BYTES_BIG_ENDIAN)
 435     return (bitnum % BITS_PER_UNIT == 0
 436             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 437                 || (bitnum + bitsize) % BITS_PER_WORD == 0));
 438   else
 439     return bitnum % BITS_PER_WORD == 0;
 440 }
 441
 442 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 443    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 444    Return false if the access would touch memory outside the range
 445    BITREGION_START to BITREGION_END for conformance to the C++ memory
 446    model.  */
 447
 448 static bool
 449 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 450                             unsigned HOST_WIDE_INT bitnum,
 451                             machine_mode fieldmode,
 452                             unsigned HOST_WIDE_INT bitregion_start,
 453                             unsigned HOST_WIDE_INT bitregion_end)
 454 {
 455   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 456
 457   /* -fstrict-volatile-bitfields must be enabled and we must have a
 458      volatile MEM.  */
 459   if (!MEM_P (op0)
 460       || !MEM_VOLATILE_P (op0)
 461       || flag_strict_volatile_bitfields <= 0)
 462     return false;
 463
 464   /* Non-integral modes likely only happen with packed structures.
 465      Punt.  */
 466   if (!SCALAR_INT_MODE_P (fieldmode))
 467     return false;
 468
 469   /* The bit size must not be larger than the field mode, and
 470      the field mode must not be larger than a word.  */
 471   if (bitsize > modesize || modesize > BITS_PER_WORD)
 472     return false;
 473
 474   /* Check for cases of unaligned fields that must be split.  */
 475   if (bitnum % BITS_PER_UNIT + bitsize > modesize
 476       || (STRICT_ALIGNMENT
 477           && bitnum % GET_MODE_ALIGNMENT (fieldmode) + bitsize > modesize))
 478     return false;
 479
 480   /* Check for cases where the C++ memory model applies.  */
 481   if (bitregion_end != 0
 482       && (bitnum - bitnum % modesize < bitregion_start
 483           || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
 484     return false;
 485
 486   return true;
 487 }
 488
 489 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 490    bit number BITNUM can be treated as a simple value of mode MODE.  */
 491
 492 static bool
 493 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 494                        unsigned HOST_WIDE_INT bitnum, machine_mode mode)
 495 {
 496   return (MEM_P (op0)
 497           && bitnum % BITS_PER_UNIT == 0
 498           && bitsize == GET_MODE_BITSIZE (mode)
 499           && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
 500               || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
 501                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 502 }
 503 \f
 504 /* Try to use instruction INSV to store VALUE into a field of OP0.
 505    BITSIZE and BITNUM are as for store_bit_field.  */
 506
 507 static bool
 508 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 509                             unsigned HOST_WIDE_INT bitsize,
 510                             unsigned HOST_WIDE_INT bitnum,
 511                             rtx value)
 512 {
 513   struct expand_operand ops[4];
 514   rtx value1;
 515   rtx xop0 = op0;
 516   rtx_insn *last = get_last_insn ();
 517   bool copy_back = false;
 518
 519   machine_mode op_mode = insv->field_mode;
 520   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 521   if (bitsize == 0 || bitsize > unit)
 522     return false;
 523
 524   if (MEM_P (xop0))
 525     /* Get a reference to the first byte of the field.  */
 526     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 527                                  &bitnum);
 528   else
 529     {
 530       /* Convert from counting within OP0 to counting in OP_MODE.  */
 531       if (BYTES_BIG_ENDIAN)
 532         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 533
 534       /* If xop0 is a register, we need it in OP_MODE
 535          to make it acceptable to the format of insv.  */
 536       if (GET_CODE (xop0) == SUBREG)
 537         /* We can't just change the mode, because this might clobber op0,
 538            and we will need the original value of op0 if insv fails.  */
 539         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 540       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 541         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 542     }
 543
 544   /* If the destination is a paradoxical subreg such that we need a
 545      truncate to the inner mode, perform the insertion on a temporary and
 546      truncate the result to the original destination.  Note that we can't
 547      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 548      X) 0)) is (reg:N X).  */
 549   if (GET_CODE (xop0) == SUBREG
 550       && REG_P (SUBREG_REG (xop0))
 551       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 552                                          op_mode))
 553     {
 554       rtx tem = gen_reg_rtx (op_mode);
 555       emit_move_insn (tem, xop0);
 556       xop0 = tem;
 557       copy_back = true;
 558     }
 559
 560   /* There are similar overflow check at the start of store_bit_field_1,
 561      but that only check the situation where the field lies completely
 562      outside the register, while there do have situation where the field
 563      lies partialy in the register, we need to adjust bitsize for this
 564      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
 565      will broken on those arch support bit insert instruction, like arm, aarch64
 566      etc.  */
 567   if (bitsize + bitnum > unit && bitnum < unit)
 568     {
 569       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
 570                "destination object, data truncated into %wu-bit",
 571                bitsize, unit - bitnum);
 572       bitsize = unit - bitnum;
 573     }
 574
 575   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 576      "backwards" from the size of the unit we are inserting into.
 577      Otherwise, we count bits from the most significant on a
 578      BYTES/BITS_BIG_ENDIAN machine.  */
 579
 580   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 581     bitnum = unit - bitsize - bitnum;
 582
 583   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 584   value1 = value;
 585   if (GET_MODE (value) != op_mode)
 586     {
 587       if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 588         {
 589           /* Optimization: Don't bother really extending VALUE
 590              if it has all the bits we will actually use.  However,
 591              if we must narrow it, be sure we do it correctly.  */
 592
 593           if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 594             {
 595               rtx tmp;
 596
 597               tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 598               if (! tmp)
 599                 tmp = simplify_gen_subreg (op_mode,
 600                                            force_reg (GET_MODE (value),
 601                                                       value1),
 602                                            GET_MODE (value), 0);
 603               value1 = tmp;
 604             }
 605           else
 606             value1 = gen_lowpart (op_mode, value1);
 607         }
 608       else if (CONST_INT_P (value))
 609         value1 = gen_int_mode (INTVAL (value), op_mode);
 610       else
 611         /* Parse phase is supposed to make VALUE's data type
 612            match that of the component reference, which is a type
 613            at least as wide as the field; so VALUE should have
 614            a mode that corresponds to that type.  */
 615         gcc_assert (CONSTANT_P (value));
 616     }
 617
 618   create_fixed_operand (&ops[0], xop0);
 619   create_integer_operand (&ops[1], bitsize);
 620   create_integer_operand (&ops[2], bitnum);
 621   create_input_operand (&ops[3], value1, op_mode);
 622   if (maybe_expand_insn (insv->icode, 4, ops))
 623     {
 624       if (copy_back)
 625         convert_move (op0, xop0, true);
 626       return true;
 627     }
 628   delete_insns_since (last);
 629   return false;
 630 }
 631
 632 /* A subroutine of store_bit_field, with the same arguments.  Return true
 633    if the operation could be implemented.
 634
 635    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 636    no other way of implementing the operation.  If FALLBACK_P is false,
 637    return false instead.  */
 638
 639 static bool
 640 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 641                    unsigned HOST_WIDE_INT bitnum,
 642                    unsigned HOST_WIDE_INT bitregion_start,
 643                    unsigned HOST_WIDE_INT bitregion_end,
 644                    machine_mode fieldmode,
 645                    rtx value, bool fallback_p)
 646 {
 647   rtx op0 = str_rtx;
 648   rtx orig_value;
 649
 650   while (GET_CODE (op0) == SUBREG)
 651     {
 652       /* The following line once was done only if WORDS_BIG_ENDIAN,
 653          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 654          meaningful at a much higher level; when structures are copied
 655          between memory and regs, the higher-numbered regs
 656          always get higher addresses.  */
 657       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 658       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 659       int byte_offset = 0;
 660
 661       /* Paradoxical subregs need special handling on big endian machines.  */
 662       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 663         {
 664           int difference = inner_mode_size - outer_mode_size;
 665
 666           if (WORDS_BIG_ENDIAN)
 667             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 668           if (BYTES_BIG_ENDIAN)
 669             byte_offset += difference % UNITS_PER_WORD;
 670         }
 671       else
 672         byte_offset = SUBREG_BYTE (op0);
 673
 674       bitnum += byte_offset * BITS_PER_UNIT;
 675       op0 = SUBREG_REG (op0);
 676     }
 677
 678   /* No action is needed if the target is a register and if the field
 679      lies completely outside that register.  This can occur if the source
 680      code contains an out-of-bounds access to a small array.  */
 681   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 682     return true;
 683
 684   /* Use vec_set patterns for inserting parts of vectors whenever
 685      available.  */
 686   if (VECTOR_MODE_P (GET_MODE (op0))
 687       && !MEM_P (op0)
 688       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 689       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 690       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 691       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 692     {
 693       struct expand_operand ops[3];
 694       machine_mode outermode = GET_MODE (op0);
 695       machine_mode innermode = GET_MODE_INNER (outermode);
 696       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 697       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 698
 699       create_fixed_operand (&ops[0], op0);
 700       create_input_operand (&ops[1], value, innermode);
 701       create_integer_operand (&ops[2], pos);
 702       if (maybe_expand_insn (icode, 3, ops))
 703         return true;
 704     }
 705
 706   /* If the target is a register, overwriting the entire object, or storing
 707      a full-word or multi-word field can be done with just a SUBREG.  */
 708   if (!MEM_P (op0)
 709       && bitsize == GET_MODE_BITSIZE (fieldmode)
 710       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 711           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 712     {
 713       /* Use the subreg machinery either to narrow OP0 to the required
 714          words or to cope with mode punning between equal-sized modes.
 715          In the latter case, use subreg on the rhs side, not lhs.  */
 716       rtx sub;
 717
 718       if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
 719         {
 720           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 721           if (sub)
 722             {
 723               emit_move_insn (op0, sub);
 724               return true;
 725             }
 726         }
 727       else
 728         {
 729           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 730                                      bitnum / BITS_PER_UNIT);
 731           if (sub)
 732             {
 733               emit_move_insn (sub, value);
 734               return true;
 735             }
 736         }
 737     }
 738
 739   /* If the target is memory, storing any naturally aligned field can be
 740      done with a simple store.  For targets that support fast unaligned
 741      memory, any naturally sized, unit aligned field can be done directly.  */
 742   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
 743     {
 744       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 745       emit_move_insn (op0, value);
 746       return true;
 747     }
 748
 749   /* Make sure we are playing with integral modes.  Pun with subregs
 750      if we aren't.  This must come after the entire register case above,
 751      since that case is valid for any mode.  The following cases are only
 752      valid for integral modes.  */
 753   {
 754     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 755     if (imode != GET_MODE (op0))
 756       {
 757         if (MEM_P (op0))
 758           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
 759         else
 760           {
 761             gcc_assert (imode != BLKmode);
 762             op0 = gen_lowpart (imode, op0);
 763           }
 764       }
 765   }
 766
 767   /* Storing an lsb-aligned field in a register
 768      can be done with a movstrict instruction.  */
 769
 770   if (!MEM_P (op0)
 771       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 772       && bitsize == GET_MODE_BITSIZE (fieldmode)
 773       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 774     {
 775       struct expand_operand ops[2];
 776       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 777       rtx arg0 = op0;
 778       unsigned HOST_WIDE_INT subreg_off;
 779
 780       if (GET_CODE (arg0) == SUBREG)
 781         {
 782           /* Else we've got some float mode source being extracted into
 783              a different float mode destination -- this combination of
 784              subregs results in Severe Tire Damage.  */
 785           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 786                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 787                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 788           arg0 = SUBREG_REG (arg0);
 789         }
 790
 791       subreg_off = bitnum / BITS_PER_UNIT;
 792       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 793         {
 794           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 795
 796           create_fixed_operand (&ops[0], arg0);
 797           /* Shrink the source operand to FIELDMODE.  */
 798           create_convert_operand_to (&ops[1], value, fieldmode, false);
 799           if (maybe_expand_insn (icode, 2, ops))
 800             return true;
 801         }
 802     }
 803
 804   /* Handle fields bigger than a word.  */
 805
 806   if (bitsize > BITS_PER_WORD)
 807     {
 808       /* Here we transfer the words of the field
 809          in the order least significant first.
 810          This is because the most significant word is the one which may
 811          be less than full.
 812          However, only do that if the value is not BLKmode.  */
 813
 814       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 815       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 816       unsigned int i;
 817       rtx_insn *last;
 818
 819       /* This is the mode we must force value to, so that there will be enough
 820          subwords to extract.  Note that fieldmode will often (always?) be
 821          VOIDmode, because that is what store_field uses to indicate that this
 822          is a bit field, but passing VOIDmode to operand_subword_force
 823          is not allowed.  */
 824       fieldmode = GET_MODE (value);
 825       if (fieldmode == VOIDmode)
 826         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 827
 828       last = get_last_insn ();
 829       for (i = 0; i < nwords; i++)
 830         {
 831           /* If I is 0, use the low-order word in both field and target;
 832              if I is 1, use the next to lowest word; and so on.  */
 833           unsigned int wordnum = (backwards
 834                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 835                                   - i - 1
 836                                   : i);
 837           unsigned int bit_offset = (backwards
 838                                      ? MAX ((int) bitsize - ((int) i + 1)
 839                                             * BITS_PER_WORD,
 840                                             0)
 841                                      : (int) i * BITS_PER_WORD);
 842           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 843           unsigned HOST_WIDE_INT new_bitsize =
 844             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 845
 846           /* If the remaining chunk doesn't have full wordsize we have
 847              to make sure that for big endian machines the higher order
 848              bits are used.  */
 849           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 850             value_word = simplify_expand_binop (word_mode, lshr_optab,
 851                                                 value_word,
 852                                                 GEN_INT (BITS_PER_WORD
 853                                                          - new_bitsize),
 854                                                 NULL_RTX, true,
 855                                                 OPTAB_LIB_WIDEN);
 856
 857           if (!store_bit_field_1 (op0, new_bitsize,
 858                                   bitnum + bit_offset,
 859                                   bitregion_start, bitregion_end,
 860                                   word_mode,
 861                                   value_word, fallback_p))
 862             {
 863               delete_insns_since (last);
 864               return false;
 865             }
 866         }
 867       return true;
 868     }
 869
 870   /* If VALUE has a floating-point or complex mode, access it as an
 871      integer of the corresponding size.  This can occur on a machine
 872      with 64 bit registers that uses SFmode for float.  It can also
 873      occur for unaligned float or complex fields.  */
 874   orig_value = value;
 875   if (GET_MODE (value) != VOIDmode
 876       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 877       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 878     {
 879       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 880       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 881     }
 882
 883   /* If OP0 is a multi-word register, narrow it to the affected word.
 884      If the region spans two words, defer to store_split_bit_field.  */
 885   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 886     {
 887       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
 888                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 889       gcc_assert (op0);
 890       bitnum %= BITS_PER_WORD;
 891       if (bitnum + bitsize > BITS_PER_WORD)
 892         {
 893           if (!fallback_p)
 894             return false;
 895
 896           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 897                                  bitregion_end, value);
 898           return true;
 899         }
 900     }
 901
 902   /* From here on we can assume that the field to be stored in fits
 903      within a word.  If the destination is a register, it too fits
 904      in a word.  */
 905
 906   extraction_insn insv;
 907   if (!MEM_P (op0)
 908       && get_best_reg_extraction_insn (&insv, EP_insv,
 909                                        GET_MODE_BITSIZE (GET_MODE (op0)),
 910                                        fieldmode)
 911       && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 912     return true;
 913
 914   /* If OP0 is a memory, try copying it to a register and seeing if a
 915      cheap register alternative is available.  */
 916   if (MEM_P (op0))
 917     {
 918       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
 919                                         fieldmode)
 920           && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 921         return true;
 922
 923       rtx_insn *last = get_last_insn ();
 924
 925       /* Try loading part of OP0 into a register, inserting the bitfield
 926          into that, and then copying the result back to OP0.  */
 927       unsigned HOST_WIDE_INT bitpos;
 928       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
 929                                                bitregion_start, bitregion_end,
 930                                                fieldmode, &bitpos);
 931       if (xop0)
 932         {
 933           rtx tempreg = copy_to_reg (xop0);
 934           if (store_bit_field_1 (tempreg, bitsize, bitpos,
 935                                  bitregion_start, bitregion_end,
 936                                  fieldmode, orig_value, false))
 937             {
 938               emit_move_insn (xop0, tempreg);
 939               return true;
 940             }
 941           delete_insns_since (last);
 942         }
 943     }
 944
 945   if (!fallback_p)
 946     return false;
 947
 948   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
 949                          bitregion_end, value);
 950   return true;
 951 }
 952
 953 /* Generate code to store value from rtx VALUE
 954    into a bit-field within structure STR_RTX
 955    containing BITSIZE bits starting at bit BITNUM.
 956
 957    BITREGION_START is bitpos of the first bitfield in this region.
 958    BITREGION_END is the bitpos of the ending bitfield in this region.
 959    These two fields are 0, if the C++ memory model does not apply,
 960    or we are not interested in keeping track of bitfield regions.
 961
 962    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 963
 964 void
 965 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 966                  unsigned HOST_WIDE_INT bitnum,
 967                  unsigned HOST_WIDE_INT bitregion_start,
 968                  unsigned HOST_WIDE_INT bitregion_end,
 969                  machine_mode fieldmode,
 970                  rtx value)
 971 {
 972   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
 973   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
 974                                   bitregion_start, bitregion_end))
 975     {
 976       /* Storing any naturally aligned field can be done with a simple
 977          store.  For targets that support fast unaligned memory, any
 978          naturally sized, unit aligned field can be done directly.  */
 979       if (simple_mem_bitfield_p (str_rtx, bitsize, bitnum, fieldmode))
 980         {
 981           str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
 982                                              bitnum / BITS_PER_UNIT);
 983           emit_move_insn (str_rtx, value);
 984         }
 985       else
 986         {
 987           str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
 988                                           &bitnum);
 989           /* Explicitly override the C/C++ memory model; ignore the
 990              bit range so that we can do the access in the mode mandated
 991              by -fstrict-volatile-bitfields instead.  */
 992           store_fixed_bit_field_1 (str_rtx, bitsize, bitnum, value);
 993         }
 994
 995       return;
 996     }
 997
 998   /* Under the C++0x memory model, we must not touch bits outside the
 999      bit region.  Adjust the address to start at the beginning of the
1000      bit region.  */
1001   if (MEM_P (str_rtx) && bitregion_start > 0)
1002     {
1003       machine_mode bestmode;
1004       HOST_WIDE_INT offset, size;
1005
1006       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1007
1008       offset = bitregion_start / BITS_PER_UNIT;
1009       bitnum -= bitregion_start;
1010       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1011       bitregion_end -= bitregion_start;
1012       bitregion_start = 0;
1013       bestmode = get_best_mode (bitsize, bitnum,
1014                                 bitregion_start, bitregion_end,
1015                                 MEM_ALIGN (str_rtx), VOIDmode,
1016                                 MEM_VOLATILE_P (str_rtx));
1017       str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
1018     }
1019
1020   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1021                           bitregion_start, bitregion_end,
1022                           fieldmode, value, true))
1023     gcc_unreachable ();
1024 }
1025 \f
1026 /* Use shifts and boolean operations to store VALUE into a bit field of
1027    width BITSIZE in OP0, starting at bit BITNUM.  */
1028
1029 static void
1030 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1031                        unsigned HOST_WIDE_INT bitnum,
1032                        unsigned HOST_WIDE_INT bitregion_start,
1033                        unsigned HOST_WIDE_INT bitregion_end,
1034                        rtx value)
1035 {
1036   /* There is a case not handled here:
1037      a structure with a known alignment of just a halfword
1038      and a field split across two aligned halfwords within the structure.
1039      Or likewise a structure with a known alignment of just a byte
1040      and a field split across two bytes.
1041      Such cases are not supposed to be able to occur.  */
1042
1043   if (MEM_P (op0))
1044     {
1045       machine_mode mode = GET_MODE (op0);
1046       if (GET_MODE_BITSIZE (mode) == 0
1047           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1048         mode = word_mode;
1049       mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1050                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1051
1052       if (mode == VOIDmode)
1053         {
1054           /* The only way this should occur is if the field spans word
1055              boundaries.  */
1056           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1057                                  bitregion_end, value);
1058           return;
1059         }
1060
1061       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1062     }
1063
1064   store_fixed_bit_field_1 (op0, bitsize, bitnum, value);
1065 }
1066
1067 /* Helper function for store_fixed_bit_field, stores
1068    the bit field always using the MODE of OP0.  */
1069
1070 static void
1071 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1072                          unsigned HOST_WIDE_INT bitnum,
1073                          rtx value)
1074 {
1075   machine_mode mode;
1076   rtx temp;
1077   int all_zero = 0;
1078   int all_one = 0;
1079
1080   mode = GET_MODE (op0);
1081   gcc_assert (SCALAR_INT_MODE_P (mode));
1082
1083   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1084      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1085
1086   if (BYTES_BIG_ENDIAN)
1087     /* BITNUM is the distance between our msb
1088        and that of the containing datum.
1089        Convert it to the distance from the lsb.  */
1090     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1091
1092   /* Now BITNUM is always the distance between our lsb
1093      and that of OP0.  */
1094
1095   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1096      we must first convert its mode to MODE.  */
1097
1098   if (CONST_INT_P (value))
1099     {
1100       unsigned HOST_WIDE_INT v = UINTVAL (value);
1101
1102       if (bitsize < HOST_BITS_PER_WIDE_INT)
1103         v &= ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1;
1104
1105       if (v == 0)
1106         all_zero = 1;
1107       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1108                 && v == ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1)
1109                || (bitsize == HOST_BITS_PER_WIDE_INT
1110                    && v == (unsigned HOST_WIDE_INT) -1))
1111         all_one = 1;
1112
1113       value = lshift_value (mode, v, bitnum);
1114     }
1115   else
1116     {
1117       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1118                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1119
1120       if (GET_MODE (value) != mode)
1121         value = convert_to_mode (mode, value, 1);
1122
1123       if (must_and)
1124         value = expand_binop (mode, and_optab, value,
1125                               mask_rtx (mode, 0, bitsize, 0),
1126                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1127       if (bitnum > 0)
1128         value = expand_shift (LSHIFT_EXPR, mode, value,
1129                               bitnum, NULL_RTX, 1);
1130     }
1131
1132   /* Now clear the chosen bits in OP0,
1133      except that if VALUE is -1 we need not bother.  */
1134   /* We keep the intermediates in registers to allow CSE to combine
1135      consecutive bitfield assignments.  */
1136
1137   temp = force_reg (mode, op0);
1138
1139   if (! all_one)
1140     {
1141       temp = expand_binop (mode, and_optab, temp,
1142                            mask_rtx (mode, bitnum, bitsize, 1),
1143                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1144       temp = force_reg (mode, temp);
1145     }
1146
1147   /* Now logical-or VALUE into OP0, unless it is zero.  */
1148
1149   if (! all_zero)
1150     {
1151       temp = expand_binop (mode, ior_optab, temp, value,
1152                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1153       temp = force_reg (mode, temp);
1154     }
1155
1156   if (op0 != temp)
1157     {
1158       op0 = copy_rtx (op0);
1159       emit_move_insn (op0, temp);
1160     }
1161 }
1162 \f
1163 /* Store a bit field that is split across multiple accessible memory objects.
1164
1165    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1166    BITSIZE is the field width; BITPOS the position of its first bit
1167    (within the word).
1168    VALUE is the value to store.
1169
1170    This does not yet handle fields wider than BITS_PER_WORD.  */
1171
1172 static void
1173 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1174                        unsigned HOST_WIDE_INT bitpos,
1175                        unsigned HOST_WIDE_INT bitregion_start,
1176                        unsigned HOST_WIDE_INT bitregion_end,
1177                        rtx value)
1178 {
1179   unsigned int unit;
1180   unsigned int bitsdone = 0;
1181
1182   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1183      much at a time.  */
1184   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1185     unit = BITS_PER_WORD;
1186   else
1187     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1188
1189   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1190      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1191      again, and we will mutually recurse forever.  */
1192   if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1193     unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1194
1195   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1196      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1197      that VALUE might be a floating-point constant.  */
1198   if (CONSTANT_P (value) && !CONST_INT_P (value))
1199     {
1200       rtx word = gen_lowpart_common (word_mode, value);
1201
1202       if (word && (value != word))
1203         value = word;
1204       else
1205         value = gen_lowpart_common (word_mode,
1206                                     force_reg (GET_MODE (value) != VOIDmode
1207                                                ? GET_MODE (value)
1208                                                : word_mode, value));
1209     }
1210
1211   while (bitsdone < bitsize)
1212     {
1213       unsigned HOST_WIDE_INT thissize;
1214       rtx part, word;
1215       unsigned HOST_WIDE_INT thispos;
1216       unsigned HOST_WIDE_INT offset;
1217
1218       offset = (bitpos + bitsdone) / unit;
1219       thispos = (bitpos + bitsdone) % unit;
1220
1221       /* When region of bytes we can touch is restricted, decrease
1222          UNIT close to the end of the region as needed.  If op0 is a REG
1223          or SUBREG of REG, don't do this, as there can't be data races
1224          on a register and we can expand shorter code in some cases.  */
1225       if (bitregion_end
1226           && unit > BITS_PER_UNIT
1227           && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1228           && !REG_P (op0)
1229           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1230         {
1231           unit = unit / 2;
1232           continue;
1233         }
1234
1235       /* THISSIZE must not overrun a word boundary.  Otherwise,
1236          store_fixed_bit_field will call us again, and we will mutually
1237          recurse forever.  */
1238       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1239       thissize = MIN (thissize, unit - thispos);
1240
1241       if (BYTES_BIG_ENDIAN)
1242         {
1243           /* Fetch successively less significant portions.  */
1244           if (CONST_INT_P (value))
1245             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1246                              >> (bitsize - bitsdone - thissize))
1247                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1248           else
1249             {
1250               int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1251               /* The args are chosen so that the last part includes the
1252                  lsb.  Give extract_bit_field the value it needs (with
1253                  endianness compensation) to fetch the piece we want.  */
1254               part = extract_fixed_bit_field (word_mode, value, thissize,
1255                                               total_bits - bitsize + bitsdone,
1256                                               NULL_RTX, 1);
1257             }
1258         }
1259       else
1260         {
1261           /* Fetch successively more significant portions.  */
1262           if (CONST_INT_P (value))
1263             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1264                              >> bitsdone)
1265                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1266           else
1267             part = extract_fixed_bit_field (word_mode, value, thissize,
1268                                             bitsdone, NULL_RTX, 1);
1269         }
1270
1271       /* If OP0 is a register, then handle OFFSET here.
1272
1273          When handling multiword bitfields, extract_bit_field may pass
1274          down a word_mode SUBREG of a larger REG for a bitfield that actually
1275          crosses a word boundary.  Thus, for a SUBREG, we must find
1276          the current word starting from the base register.  */
1277       if (GET_CODE (op0) == SUBREG)
1278         {
1279           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD)
1280                             + (offset * unit / BITS_PER_WORD);
1281           machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1282           if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1283             word = word_offset ? const0_rtx : op0;
1284           else
1285             word = operand_subword_force (SUBREG_REG (op0), word_offset,
1286                                           GET_MODE (SUBREG_REG (op0)));
1287           offset &= BITS_PER_WORD / unit - 1;
1288         }
1289       else if (REG_P (op0))
1290         {
1291           machine_mode op0_mode = GET_MODE (op0);
1292           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1293             word = offset ? const0_rtx : op0;
1294           else
1295             word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1296                                           GET_MODE (op0));
1297           offset &= BITS_PER_WORD / unit - 1;
1298         }
1299       else
1300         word = op0;
1301
1302       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1303          it is just an out-of-bounds access.  Ignore it.  */
1304       if (word != const0_rtx)
1305         store_fixed_bit_field (word, thissize, offset * unit + thispos,
1306                                bitregion_start, bitregion_end, part);
1307       bitsdone += thissize;
1308     }
1309 }
1310 \f
1311 /* A subroutine of extract_bit_field_1 that converts return value X
1312    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1313    to extract_bit_field.  */
1314
1315 static rtx
1316 convert_extracted_bit_field (rtx x, machine_mode mode,
1317                              machine_mode tmode, bool unsignedp)
1318 {
1319   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1320     return x;
1321
1322   /* If the x mode is not a scalar integral, first convert to the
1323      integer mode of that size and then access it as a floating-point
1324      value via a SUBREG.  */
1325   if (!SCALAR_INT_MODE_P (tmode))
1326     {
1327       machine_mode smode;
1328
1329       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1330       x = convert_to_mode (smode, x, unsignedp);
1331       x = force_reg (smode, x);
1332       return gen_lowpart (tmode, x);
1333     }
1334
1335   return convert_to_mode (tmode, x, unsignedp);
1336 }
1337
1338 /* Try to use an ext(z)v pattern to extract a field from OP0.
1339    Return the extracted value on success, otherwise return null.
1340    EXT_MODE is the mode of the extraction and the other arguments
1341    are as for extract_bit_field.  */
1342
1343 static rtx
1344 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1345                               unsigned HOST_WIDE_INT bitsize,
1346                               unsigned HOST_WIDE_INT bitnum,
1347                               int unsignedp, rtx target,
1348                               machine_mode mode, machine_mode tmode)
1349 {
1350   struct expand_operand ops[4];
1351   rtx spec_target = target;
1352   rtx spec_target_subreg = 0;
1353   machine_mode ext_mode = extv->field_mode;
1354   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1355
1356   if (bitsize == 0 || unit < bitsize)
1357     return NULL_RTX;
1358
1359   if (MEM_P (op0))
1360     /* Get a reference to the first byte of the field.  */
1361     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1362                                 &bitnum);
1363   else
1364     {
1365       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1366       if (BYTES_BIG_ENDIAN)
1367         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1368
1369       /* If op0 is a register, we need it in EXT_MODE to make it
1370          acceptable to the format of ext(z)v.  */
1371       if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1372         return NULL_RTX;
1373       if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1374         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1375     }
1376
1377   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1378      "backwards" from the size of the unit we are extracting from.
1379      Otherwise, we count bits from the most significant on a
1380      BYTES/BITS_BIG_ENDIAN machine.  */
1381
1382   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1383     bitnum = unit - bitsize - bitnum;
1384
1385   if (target == 0)
1386     target = spec_target = gen_reg_rtx (tmode);
1387
1388   if (GET_MODE (target) != ext_mode)
1389     {
1390       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1391          between the mode of the extraction (word_mode) and the target
1392          mode.  Instead, create a temporary and use convert_move to set
1393          the target.  */
1394       if (REG_P (target)
1395           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1396         {
1397           target = gen_lowpart (ext_mode, target);
1398           if (GET_MODE_PRECISION (ext_mode)
1399               > GET_MODE_PRECISION (GET_MODE (spec_target)))
1400             spec_target_subreg = target;
1401         }
1402       else
1403         target = gen_reg_rtx (ext_mode);
1404     }
1405
1406   create_output_operand (&ops[0], target, ext_mode);
1407   create_fixed_operand (&ops[1], op0);
1408   create_integer_operand (&ops[2], bitsize);
1409   create_integer_operand (&ops[3], bitnum);
1410   if (maybe_expand_insn (extv->icode, 4, ops))
1411     {
1412       target = ops[0].value;
1413       if (target == spec_target)
1414         return target;
1415       if (target == spec_target_subreg)
1416         return spec_target;
1417       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1418     }
1419   return NULL_RTX;
1420 }
1421
1422 /* A subroutine of extract_bit_field, with the same arguments.
1423    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1424    if we can find no other means of implementing the operation.
1425    if FALLBACK_P is false, return NULL instead.  */
1426
1427 static rtx
1428 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1429                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1430                      machine_mode mode, machine_mode tmode,
1431                      bool fallback_p)
1432 {
1433   rtx op0 = str_rtx;
1434   machine_mode int_mode;
1435   machine_mode mode1;
1436
1437   if (tmode == VOIDmode)
1438     tmode = mode;
1439
1440   while (GET_CODE (op0) == SUBREG)
1441     {
1442       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1443       op0 = SUBREG_REG (op0);
1444     }
1445
1446   /* If we have an out-of-bounds access to a register, just return an
1447      uninitialized register of the required mode.  This can occur if the
1448      source code contains an out-of-bounds access to a small array.  */
1449   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1450     return gen_reg_rtx (tmode);
1451
1452   if (REG_P (op0)
1453       && mode == GET_MODE (op0)
1454       && bitnum == 0
1455       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1456     {
1457       /* We're trying to extract a full register from itself.  */
1458       return op0;
1459     }
1460
1461   /* See if we can get a better vector mode before extracting.  */
1462   if (VECTOR_MODE_P (GET_MODE (op0))
1463       && !MEM_P (op0)
1464       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1465     {
1466       machine_mode new_mode;
1467
1468       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1469         new_mode = MIN_MODE_VECTOR_FLOAT;
1470       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1471         new_mode = MIN_MODE_VECTOR_FRACT;
1472       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1473         new_mode = MIN_MODE_VECTOR_UFRACT;
1474       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1475         new_mode = MIN_MODE_VECTOR_ACCUM;
1476       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1477         new_mode = MIN_MODE_VECTOR_UACCUM;
1478       else
1479         new_mode = MIN_MODE_VECTOR_INT;
1480
1481       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1482         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1483             && targetm.vector_mode_supported_p (new_mode))
1484           break;
1485       if (new_mode != VOIDmode)
1486         op0 = gen_lowpart (new_mode, op0);
1487     }
1488
1489   /* Use vec_extract patterns for extracting parts of vectors whenever
1490      available.  */
1491   if (VECTOR_MODE_P (GET_MODE (op0))
1492       && !MEM_P (op0)
1493       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1494       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1495           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1496     {
1497       struct expand_operand ops[3];
1498       machine_mode outermode = GET_MODE (op0);
1499       machine_mode innermode = GET_MODE_INNER (outermode);
1500       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1501       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1502
1503       create_output_operand (&ops[0], target, innermode);
1504       create_input_operand (&ops[1], op0, outermode);
1505       create_integer_operand (&ops[2], pos);
1506       if (maybe_expand_insn (icode, 3, ops))
1507         {
1508           target = ops[0].value;
1509           if (GET_MODE (target) != mode)
1510             return gen_lowpart (tmode, target);
1511           return target;
1512         }
1513     }
1514
1515   /* Make sure we are playing with integral modes.  Pun with subregs
1516      if we aren't.  */
1517   {
1518     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1519     if (imode != GET_MODE (op0))
1520       {
1521         if (MEM_P (op0))
1522           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1523         else if (imode != BLKmode)
1524           {
1525             op0 = gen_lowpart (imode, op0);
1526
1527             /* If we got a SUBREG, force it into a register since we
1528                aren't going to be able to do another SUBREG on it.  */
1529             if (GET_CODE (op0) == SUBREG)
1530               op0 = force_reg (imode, op0);
1531           }
1532         else if (REG_P (op0))
1533           {
1534             rtx reg, subreg;
1535             imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1536                                             MODE_INT);
1537             reg = gen_reg_rtx (imode);
1538             subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1539             emit_move_insn (subreg, op0);
1540             op0 = reg;
1541             bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1542           }
1543         else
1544           {
1545             HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1546             rtx mem = assign_stack_temp (GET_MODE (op0), size);
1547             emit_move_insn (mem, op0);
1548             op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1549           }
1550       }
1551   }
1552
1553   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1554      If that's wrong, the solution is to test for it and set TARGET to 0
1555      if needed.  */
1556
1557   /* Get the mode of the field to use for atomic access or subreg
1558      conversion.  */
1559   mode1 = mode;
1560   if (SCALAR_INT_MODE_P (tmode))
1561     {
1562       machine_mode try_mode = mode_for_size (bitsize,
1563                                                   GET_MODE_CLASS (tmode), 0);
1564       if (try_mode != BLKmode)
1565         mode1 = try_mode;
1566     }
1567   gcc_assert (mode1 != BLKmode);
1568
1569   /* Extraction of a full MODE1 value can be done with a subreg as long
1570      as the least significant bit of the value is the least significant
1571      bit of either OP0 or a word of OP0.  */
1572   if (!MEM_P (op0)
1573       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1574       && bitsize == GET_MODE_BITSIZE (mode1)
1575       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1576     {
1577       rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1578                                      bitnum / BITS_PER_UNIT);
1579       if (sub)
1580         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1581     }
1582
1583   /* Extraction of a full MODE1 value can be done with a load as long as
1584      the field is on a byte boundary and is sufficiently aligned.  */
1585   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1586     {
1587       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1588       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1589     }
1590
1591   /* Handle fields bigger than a word.  */
1592
1593   if (bitsize > BITS_PER_WORD)
1594     {
1595       /* Here we transfer the words of the field
1596          in the order least significant first.
1597          This is because the most significant word is the one which may
1598          be less than full.  */
1599
1600       unsigned int backwards = WORDS_BIG_ENDIAN;
1601       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1602       unsigned int i;
1603       rtx_insn *last;
1604
1605       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1606         target = gen_reg_rtx (mode);
1607
1608       /* Indicate for flow that the entire target reg is being set.  */
1609       emit_clobber (target);
1610
1611       last = get_last_insn ();
1612       for (i = 0; i < nwords; i++)
1613         {
1614           /* If I is 0, use the low-order word in both field and target;
1615              if I is 1, use the next to lowest word; and so on.  */
1616           /* Word number in TARGET to use.  */
1617           unsigned int wordnum
1618             = (backwards
1619                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1620                : i);
1621           /* Offset from start of field in OP0.  */
1622           unsigned int bit_offset = (backwards
1623                                      ? MAX ((int) bitsize - ((int) i + 1)
1624                                             * BITS_PER_WORD,
1625                                             0)
1626                                      : (int) i * BITS_PER_WORD);
1627           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1628           rtx result_part
1629             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1630                                              bitsize - i * BITS_PER_WORD),
1631                                    bitnum + bit_offset, 1, target_part,
1632                                    mode, word_mode, fallback_p);
1633
1634           gcc_assert (target_part);
1635           if (!result_part)
1636             {
1637               delete_insns_since (last);
1638               return NULL;
1639             }
1640
1641           if (result_part != target_part)
1642             emit_move_insn (target_part, result_part);
1643         }
1644
1645       if (unsignedp)
1646         {
1647           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1648              need to be zero'd out.  */
1649           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1650             {
1651               unsigned int i, total_words;
1652
1653               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1654               for (i = nwords; i < total_words; i++)
1655                 emit_move_insn
1656                   (operand_subword (target,
1657                                     backwards ? total_words - i - 1 : i,
1658                                     1, VOIDmode),
1659                    const0_rtx);
1660             }
1661           return target;
1662         }
1663
1664       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1665       target = expand_shift (LSHIFT_EXPR, mode, target,
1666                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1667       return expand_shift (RSHIFT_EXPR, mode, target,
1668                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1669     }
1670
1671   /* If OP0 is a multi-word register, narrow it to the affected word.
1672      If the region spans two words, defer to extract_split_bit_field.  */
1673   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1674     {
1675       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1676                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1677       bitnum %= BITS_PER_WORD;
1678       if (bitnum + bitsize > BITS_PER_WORD)
1679         {
1680           if (!fallback_p)
1681             return NULL_RTX;
1682           target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1683           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1684         }
1685     }
1686
1687   /* From here on we know the desired field is smaller than a word.
1688      If OP0 is a register, it too fits within a word.  */
1689   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1690   extraction_insn extv;
1691   if (!MEM_P (op0)
1692       /* ??? We could limit the structure size to the part of OP0 that
1693          contains the field, with appropriate checks for endianness
1694          and TRULY_NOOP_TRUNCATION.  */
1695       && get_best_reg_extraction_insn (&extv, pattern,
1696                                        GET_MODE_BITSIZE (GET_MODE (op0)),
1697                                        tmode))
1698     {
1699       rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1700                                                  unsignedp, target, mode,
1701                                                  tmode);
1702       if (result)
1703         return result;
1704     }
1705
1706   /* If OP0 is a memory, try copying it to a register and seeing if a
1707      cheap register alternative is available.  */
1708   if (MEM_P (op0))
1709     {
1710       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1711                                         tmode))
1712         {
1713           rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1714                                                      bitnum, unsignedp,
1715                                                      target, mode,
1716                                                      tmode);
1717           if (result)
1718             return result;
1719         }
1720
1721       rtx_insn *last = get_last_insn ();
1722
1723       /* Try loading part of OP0 into a register and extracting the
1724          bitfield from that.  */
1725       unsigned HOST_WIDE_INT bitpos;
1726       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1727                                                0, 0, tmode, &bitpos);
1728       if (xop0)
1729         {
1730           xop0 = copy_to_reg (xop0);
1731           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1732                                             unsignedp, target,
1733                                             mode, tmode, false);
1734           if (result)
1735             return result;
1736           delete_insns_since (last);
1737         }
1738     }
1739
1740   if (!fallback_p)
1741     return NULL;
1742
1743   /* Find a correspondingly-sized integer field, so we can apply
1744      shifts and masks to it.  */
1745   int_mode = int_mode_for_mode (tmode);
1746   if (int_mode == BLKmode)
1747     int_mode = int_mode_for_mode (mode);
1748   /* Should probably push op0 out to memory and then do a load.  */
1749   gcc_assert (int_mode != BLKmode);
1750
1751   target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
1752                                     target, unsignedp);
1753   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1754 }
1755
1756 /* Generate code to extract a byte-field from STR_RTX
1757    containing BITSIZE bits, starting at BITNUM,
1758    and put it in TARGET if possible (if TARGET is nonzero).
1759    Regardless of TARGET, we return the rtx for where the value is placed.
1760
1761    STR_RTX is the structure containing the byte (a REG or MEM).
1762    UNSIGNEDP is nonzero if this is an unsigned bit field.
1763    MODE is the natural mode of the field value once extracted.
1764    TMODE is the mode the caller would like the value to have;
1765    but the value may be returned with type MODE instead.
1766
1767    If a TARGET is specified and we can store in it at no extra cost,
1768    we do so, and return TARGET.
1769    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1770    if they are equally easy.  */
1771
1772 rtx
1773 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1774                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1775                    machine_mode mode, machine_mode tmode)
1776 {
1777   machine_mode mode1;
1778
1779   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1780   if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1781     mode1 = GET_MODE (str_rtx);
1782   else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1783     mode1 = GET_MODE (target);
1784   else
1785     mode1 = tmode;
1786
1787   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1788     {
1789       rtx result;
1790
1791       /* Extraction of a full MODE1 value can be done with a load as long as
1792          the field is on a byte boundary and is sufficiently aligned.  */
1793       if (simple_mem_bitfield_p (str_rtx, bitsize, bitnum, mode1))
1794         result = adjust_bitfield_address (str_rtx, mode1,
1795                                           bitnum / BITS_PER_UNIT);
1796       else
1797         {
1798           str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1799                                           &bitnum);
1800           result = extract_fixed_bit_field_1 (mode, str_rtx, bitsize, bitnum,
1801                                               target, unsignedp);
1802         }
1803
1804       return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1805     }
1806
1807   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1808                               target, mode, tmode, true);
1809 }
1810 \f
1811 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1812    from bit BITNUM of OP0.
1813
1814    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1815    If TARGET is nonzero, attempts to store the value there
1816    and return TARGET, but this is not guaranteed.
1817    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1818
1819 static rtx
1820 extract_fixed_bit_field (machine_mode tmode, rtx op0,
1821                          unsigned HOST_WIDE_INT bitsize,
1822                          unsigned HOST_WIDE_INT bitnum, rtx target,
1823                          int unsignedp)
1824 {
1825   if (MEM_P (op0))
1826     {
1827       machine_mode mode
1828         = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1829                          MEM_VOLATILE_P (op0));
1830
1831       if (mode == VOIDmode)
1832         /* The only way this should occur is if the field spans word
1833            boundaries.  */
1834         return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1835
1836       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1837     }
1838
1839   return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1840                                     target, unsignedp);
1841 }
1842
1843 /* Helper function for extract_fixed_bit_field, extracts
1844    the bit field always using the MODE of OP0.  */
1845
1846 static rtx
1847 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
1848                            unsigned HOST_WIDE_INT bitsize,
1849                            unsigned HOST_WIDE_INT bitnum, rtx target,
1850                            int unsignedp)
1851 {
1852   machine_mode mode = GET_MODE (op0);
1853   gcc_assert (SCALAR_INT_MODE_P (mode));
1854
1855   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1856      for invalid input, such as extract equivalent of f5 from
1857      gcc.dg/pr48335-2.c.  */
1858
1859   if (BYTES_BIG_ENDIAN)
1860     /* BITNUM is the distance between our msb and that of OP0.
1861        Convert it to the distance from the lsb.  */
1862     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1863
1864   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1865      We have reduced the big-endian case to the little-endian case.  */
1866
1867   if (unsignedp)
1868     {
1869       if (bitnum)
1870         {
1871           /* If the field does not already start at the lsb,
1872              shift it so it does.  */
1873           /* Maybe propagate the target for the shift.  */
1874           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1875           if (tmode != mode)
1876             subtarget = 0;
1877           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1878         }
1879       /* Convert the value to the desired mode.  */
1880       if (mode != tmode)
1881         op0 = convert_to_mode (tmode, op0, 1);
1882
1883       /* Unless the msb of the field used to be the msb when we shifted,
1884          mask out the upper bits.  */
1885
1886       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
1887         return expand_binop (GET_MODE (op0), and_optab, op0,
1888                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1889                              target, 1, OPTAB_LIB_WIDEN);
1890       return op0;
1891     }
1892
1893   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1894      then arithmetic-shift its lsb to the lsb of the word.  */
1895   op0 = force_reg (mode, op0);
1896
1897   /* Find the narrowest integer mode that contains the field.  */
1898
1899   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1900        mode = GET_MODE_WIDER_MODE (mode))
1901     if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
1902       {
1903         op0 = convert_to_mode (mode, op0, 0);
1904         break;
1905       }
1906
1907   if (mode != tmode)
1908     target = 0;
1909
1910   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
1911     {
1912       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
1913       /* Maybe propagate the target for the shift.  */
1914       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1915       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1916     }
1917
1918   return expand_shift (RSHIFT_EXPR, mode, op0,
1919                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1920 }
1921
1922 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1923    VALUE << BITPOS.  */
1924
1925 static rtx
1926 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
1927               int bitpos)
1928 {
1929   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
1930 }
1931 \f
1932 /* Extract a bit field that is split across two words
1933    and return an RTX for the result.
1934
1935    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1936    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1937    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1938
1939 static rtx
1940 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1941                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1942 {
1943   unsigned int unit;
1944   unsigned int bitsdone = 0;
1945   rtx result = NULL_RTX;
1946   int first = 1;
1947
1948   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1949      much at a time.  */
1950   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1951     unit = BITS_PER_WORD;
1952   else
1953     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1954
1955   while (bitsdone < bitsize)
1956     {
1957       unsigned HOST_WIDE_INT thissize;
1958       rtx part, word;
1959       unsigned HOST_WIDE_INT thispos;
1960       unsigned HOST_WIDE_INT offset;
1961
1962       offset = (bitpos + bitsdone) / unit;
1963       thispos = (bitpos + bitsdone) % unit;
1964
1965       /* THISSIZE must not overrun a word boundary.  Otherwise,
1966          extract_fixed_bit_field will call us again, and we will mutually
1967          recurse forever.  */
1968       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1969       thissize = MIN (thissize, unit - thispos);
1970
1971       /* If OP0 is a register, then handle OFFSET here.
1972
1973          When handling multiword bitfields, extract_bit_field may pass
1974          down a word_mode SUBREG of a larger REG for a bitfield that actually
1975          crosses a word boundary.  Thus, for a SUBREG, we must find
1976          the current word starting from the base register.  */
1977       if (GET_CODE (op0) == SUBREG)
1978         {
1979           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1980           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1981                                         GET_MODE (SUBREG_REG (op0)));
1982           offset = 0;
1983         }
1984       else if (REG_P (op0))
1985         {
1986           word = operand_subword_force (op0, offset, GET_MODE (op0));
1987           offset = 0;
1988         }
1989       else
1990         word = op0;
1991
1992       /* Extract the parts in bit-counting order,
1993          whose meaning is determined by BYTES_PER_UNIT.
1994          OFFSET is in UNITs, and UNIT is in bits.  */
1995       part = extract_fixed_bit_field (word_mode, word, thissize,
1996                                       offset * unit + thispos, 0, 1);
1997       bitsdone += thissize;
1998
1999       /* Shift this part into place for the result.  */
2000       if (BYTES_BIG_ENDIAN)
2001         {
2002           if (bitsize != bitsdone)
2003             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2004                                  bitsize - bitsdone, 0, 1);
2005         }
2006       else
2007         {
2008           if (bitsdone != thissize)
2009             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2010                                  bitsdone - thissize, 0, 1);
2011         }
2012
2013       if (first)
2014         result = part;
2015       else
2016         /* Combine the parts with bitwise or.  This works
2017            because we extracted each part as an unsigned bit field.  */
2018         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2019                                OPTAB_LIB_WIDEN);
2020
2021       first = 0;
2022     }
2023
2024   /* Unsigned bit field: we are done.  */
2025   if (unsignedp)
2026     return result;
2027   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2028   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2029                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2030   return expand_shift (RSHIFT_EXPR, word_mode, result,
2031                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2032 }
2033 \f
2034 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2035    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2036    MODE, fill the upper bits with zeros.  Fail if the layout of either
2037    mode is unknown (as for CC modes) or if the extraction would involve
2038    unprofitable mode punning.  Return the value on success, otherwise
2039    return null.
2040
2041    This is different from gen_lowpart* in these respects:
2042
2043      - the returned value must always be considered an rvalue
2044
2045      - when MODE is wider than SRC_MODE, the extraction involves
2046        a zero extension
2047
2048      - when MODE is smaller than SRC_MODE, the extraction involves
2049        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2050
2051    In other words, this routine performs a computation, whereas the
2052    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2053    operations.  */
2054
2055 rtx
2056 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2057 {
2058   machine_mode int_mode, src_int_mode;
2059
2060   if (mode == src_mode)
2061     return src;
2062
2063   if (CONSTANT_P (src))
2064     {
2065       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2066          fails, it will happily create (subreg (symbol_ref)) or similar
2067          invalid SUBREGs.  */
2068       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2069       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2070       if (ret)
2071         return ret;
2072
2073       if (GET_MODE (src) == VOIDmode
2074           || !validate_subreg (mode, src_mode, src, byte))
2075         return NULL_RTX;
2076
2077       src = force_reg (GET_MODE (src), src);
2078       return gen_rtx_SUBREG (mode, src, byte);
2079     }
2080
2081   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2082     return NULL_RTX;
2083
2084   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2085       && MODES_TIEABLE_P (mode, src_mode))
2086     {
2087       rtx x = gen_lowpart_common (mode, src);
2088       if (x)
2089         return x;
2090     }
2091
2092   src_int_mode = int_mode_for_mode (src_mode);
2093   int_mode = int_mode_for_mode (mode);
2094   if (src_int_mode == BLKmode || int_mode == BLKmode)
2095     return NULL_RTX;
2096
2097   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2098     return NULL_RTX;
2099   if (!MODES_TIEABLE_P (int_mode, mode))
2100     return NULL_RTX;
2101
2102   src = gen_lowpart (src_int_mode, src);
2103   src = convert_modes (int_mode, src_int_mode, src, true);
2104   src = gen_lowpart (mode, src);
2105   return src;
2106 }
2107 \f
2108 /* Add INC into TARGET.  */
2109
2110 void
2111 expand_inc (rtx target, rtx inc)
2112 {
2113   rtx value = expand_binop (GET_MODE (target), add_optab,
2114                             target, inc,
2115                             target, 0, OPTAB_LIB_WIDEN);
2116   if (value != target)
2117     emit_move_insn (target, value);
2118 }
2119
2120 /* Subtract DEC from TARGET.  */
2121
2122 void
2123 expand_dec (rtx target, rtx dec)
2124 {
2125   rtx value = expand_binop (GET_MODE (target), sub_optab,
2126                             target, dec,
2127                             target, 0, OPTAB_LIB_WIDEN);
2128   if (value != target)
2129     emit_move_insn (target, value);
2130 }
2131 \f
2132 /* Output a shift instruction for expression code CODE,
2133    with SHIFTED being the rtx for the value to shift,
2134    and AMOUNT the rtx for the amount to shift by.
2135    Store the result in the rtx TARGET, if that is convenient.
2136    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2137    Return the rtx for where the value is.  */
2138
2139 static rtx
2140 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2141                 rtx amount, rtx target, int unsignedp)
2142 {
2143   rtx op1, temp = 0;
2144   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2145   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2146   optab lshift_optab = ashl_optab;
2147   optab rshift_arith_optab = ashr_optab;
2148   optab rshift_uns_optab = lshr_optab;
2149   optab lrotate_optab = rotl_optab;
2150   optab rrotate_optab = rotr_optab;
2151   machine_mode op1_mode;
2152   machine_mode scalar_mode = mode;
2153   int attempt;
2154   bool speed = optimize_insn_for_speed_p ();
2155
2156   if (VECTOR_MODE_P (mode))
2157     scalar_mode = GET_MODE_INNER (mode);
2158   op1 = amount;
2159   op1_mode = GET_MODE (op1);
2160
2161   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2162      shift amount is a vector, use the vector/vector shift patterns.  */
2163   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2164     {
2165       lshift_optab = vashl_optab;
2166       rshift_arith_optab = vashr_optab;
2167       rshift_uns_optab = vlshr_optab;
2168       lrotate_optab = vrotl_optab;
2169       rrotate_optab = vrotr_optab;
2170     }
2171
2172   /* Previously detected shift-counts computed by NEGATE_EXPR
2173      and shifted in the other direction; but that does not work
2174      on all machines.  */
2175
2176   if (SHIFT_COUNT_TRUNCATED)
2177     {
2178       if (CONST_INT_P (op1)
2179           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2180               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2181         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2182                        % GET_MODE_BITSIZE (scalar_mode));
2183       else if (GET_CODE (op1) == SUBREG
2184                && subreg_lowpart_p (op1)
2185                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2186                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2187         op1 = SUBREG_REG (op1);
2188     }
2189
2190   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2191      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2192      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2193      amount instead.  */
2194   if (rotate
2195       && CONST_INT_P (op1)
2196       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2197                    GET_MODE_BITSIZE (scalar_mode) - 1))
2198     {
2199       op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2200       left = !left;
2201       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2202     }
2203
2204   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2205      Note that this is not the case for bigger values.  For instance a rotation
2206      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2207      0x04030201 (bswapsi).  */
2208   if (rotate
2209       && CONST_INT_P (op1)
2210       && INTVAL (op1) == BITS_PER_UNIT
2211       && GET_MODE_SIZE (scalar_mode) == 2
2212       && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2213     return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2214                                   unsignedp);
2215
2216   if (op1 == const0_rtx)
2217     return shifted;
2218
2219   /* Check whether its cheaper to implement a left shift by a constant
2220      bit count by a sequence of additions.  */
2221   if (code == LSHIFT_EXPR
2222       && CONST_INT_P (op1)
2223       && INTVAL (op1) > 0
2224       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2225       && INTVAL (op1) < MAX_BITS_PER_WORD
2226       && (shift_cost (speed, mode, INTVAL (op1))
2227           > INTVAL (op1) * add_cost (speed, mode))
2228       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2229     {
2230       int i;
2231       for (i = 0; i < INTVAL (op1); i++)
2232         {
2233           temp = force_reg (mode, shifted);
2234           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2235                                   unsignedp, OPTAB_LIB_WIDEN);
2236         }
2237       return shifted;
2238     }
2239
2240   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2241     {
2242       enum optab_methods methods;
2243
2244       if (attempt == 0)
2245         methods = OPTAB_DIRECT;
2246       else if (attempt == 1)
2247         methods = OPTAB_WIDEN;
2248       else
2249         methods = OPTAB_LIB_WIDEN;
2250
2251       if (rotate)
2252         {
2253           /* Widening does not work for rotation.  */
2254           if (methods == OPTAB_WIDEN)
2255             continue;
2256           else if (methods == OPTAB_LIB_WIDEN)
2257             {
2258               /* If we have been unable to open-code this by a rotation,
2259                  do it as the IOR of two shifts.  I.e., to rotate A
2260                  by N bits, compute
2261                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2262                  where C is the bitsize of A.
2263
2264                  It is theoretically possible that the target machine might
2265                  not be able to perform either shift and hence we would
2266                  be making two libcalls rather than just the one for the
2267                  shift (similarly if IOR could not be done).  We will allow
2268                  this extremely unlikely lossage to avoid complicating the
2269                  code below.  */
2270
2271               rtx subtarget = target == shifted ? 0 : target;
2272               rtx new_amount, other_amount;
2273               rtx temp1;
2274
2275               new_amount = op1;
2276               if (op1 == const0_rtx)
2277                 return shifted;
2278               else if (CONST_INT_P (op1))
2279                 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2280                                         - INTVAL (op1));
2281               else
2282                 {
2283                   other_amount
2284                     = simplify_gen_unary (NEG, GET_MODE (op1),
2285                                           op1, GET_MODE (op1));
2286                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2287                   other_amount
2288                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2289                                            gen_int_mode (mask, GET_MODE (op1)));
2290                 }
2291
2292               shifted = force_reg (mode, shifted);
2293
2294               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2295                                      mode, shifted, new_amount, 0, 1);
2296               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2297                                       mode, shifted, other_amount,
2298                                       subtarget, 1);
2299               return expand_binop (mode, ior_optab, temp, temp1, target,
2300                                    unsignedp, methods);
2301             }
2302
2303           temp = expand_binop (mode,
2304                                left ? lrotate_optab : rrotate_optab,
2305                                shifted, op1, target, unsignedp, methods);
2306         }
2307       else if (unsignedp)
2308         temp = expand_binop (mode,
2309                              left ? lshift_optab : rshift_uns_optab,
2310                              shifted, op1, target, unsignedp, methods);
2311
2312       /* Do arithmetic shifts.
2313          Also, if we are going to widen the operand, we can just as well
2314          use an arithmetic right-shift instead of a logical one.  */
2315       if (temp == 0 && ! rotate
2316           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2317         {
2318           enum optab_methods methods1 = methods;
2319
2320           /* If trying to widen a log shift to an arithmetic shift,
2321              don't accept an arithmetic shift of the same size.  */
2322           if (unsignedp)
2323             methods1 = OPTAB_MUST_WIDEN;
2324
2325           /* Arithmetic shift */
2326
2327           temp = expand_binop (mode,
2328                                left ? lshift_optab : rshift_arith_optab,
2329                                shifted, op1, target, unsignedp, methods1);
2330         }
2331
2332       /* We used to try extzv here for logical right shifts, but that was
2333          only useful for one machine, the VAX, and caused poor code
2334          generation there for lshrdi3, so the code was deleted and a
2335          define_expand for lshrsi3 was added to vax.md.  */
2336     }
2337
2338   gcc_assert (temp);
2339   return temp;
2340 }
2341
2342 /* Output a shift instruction for expression code CODE,
2343    with SHIFTED being the rtx for the value to shift,
2344    and AMOUNT the amount to shift by.
2345    Store the result in the rtx TARGET, if that is convenient.
2346    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2347    Return the rtx for where the value is.  */
2348
2349 rtx
2350 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2351               int amount, rtx target, int unsignedp)
2352 {
2353   return expand_shift_1 (code, mode,
2354                          shifted, GEN_INT (amount), target, unsignedp);
2355 }
2356
2357 /* Output a shift instruction for expression code CODE,
2358    with SHIFTED being the rtx for the value to shift,
2359    and AMOUNT the tree for the amount to shift by.
2360    Store the result in the rtx TARGET, if that is convenient.
2361    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2362    Return the rtx for where the value is.  */
2363
2364 rtx
2365 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2366                        tree amount, rtx target, int unsignedp)
2367 {
2368   return expand_shift_1 (code, mode,
2369                          shifted, expand_normal (amount), target, unsignedp);
2370 }
2371
2372 \f
2373 /* Indicates the type of fixup needed after a constant multiplication.
2374    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2375    the result should be negated, and ADD_VARIANT means that the
2376    multiplicand should be added to the result.  */
2377 enum mult_variant {basic_variant, negate_variant, add_variant};
2378
2379 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2380                         const struct mult_cost *, machine_mode mode);
2381 static bool choose_mult_variant (machine_mode, HOST_WIDE_INT,
2382                                  struct algorithm *, enum mult_variant *, int);
2383 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2384                               const struct algorithm *, enum mult_variant);
2385 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2386 static rtx extract_high_half (machine_mode, rtx);
2387 static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2388 static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
2389                                        int, int);
2390 /* Compute and return the best algorithm for multiplying by T.
2391    The algorithm must cost less than cost_limit
2392    If retval.cost >= COST_LIMIT, no algorithm was found and all
2393    other field of the returned struct are undefined.
2394    MODE is the machine mode of the multiplication.  */
2395
2396 static void
2397 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2398             const struct mult_cost *cost_limit, machine_mode mode)
2399 {
2400   int m;
2401   struct algorithm *alg_in, *best_alg;
2402   struct mult_cost best_cost;
2403   struct mult_cost new_limit;
2404   int op_cost, op_latency;
2405   unsigned HOST_WIDE_INT orig_t = t;
2406   unsigned HOST_WIDE_INT q;
2407   int maxm, hash_index;
2408   bool cache_hit = false;
2409   enum alg_code cache_alg = alg_zero;
2410   bool speed = optimize_insn_for_speed_p ();
2411   machine_mode imode;
2412   struct alg_hash_entry *entry_ptr;
2413
2414   /* Indicate that no algorithm is yet found.  If no algorithm
2415      is found, this value will be returned and indicate failure.  */
2416   alg_out->cost.cost = cost_limit->cost + 1;
2417   alg_out->cost.latency = cost_limit->latency + 1;
2418
2419   if (cost_limit->cost < 0
2420       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2421     return;
2422
2423   /* Be prepared for vector modes.  */
2424   imode = GET_MODE_INNER (mode);
2425   if (imode == VOIDmode)
2426     imode = mode;
2427
2428   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2429
2430   /* Restrict the bits of "t" to the multiplication's mode.  */
2431   t &= GET_MODE_MASK (imode);
2432
2433   /* t == 1 can be done in zero cost.  */
2434   if (t == 1)
2435     {
2436       alg_out->ops = 1;
2437       alg_out->cost.cost = 0;
2438       alg_out->cost.latency = 0;
2439       alg_out->op[0] = alg_m;
2440       return;
2441     }
2442
2443   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2444      fail now.  */
2445   if (t == 0)
2446     {
2447       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2448         return;
2449       else
2450         {
2451           alg_out->ops = 1;
2452           alg_out->cost.cost = zero_cost (speed);
2453           alg_out->cost.latency = zero_cost (speed);
2454           alg_out->op[0] = alg_zero;
2455           return;
2456         }
2457     }
2458
2459   /* We'll be needing a couple extra algorithm structures now.  */
2460
2461   alg_in = XALLOCA (struct algorithm);
2462   best_alg = XALLOCA (struct algorithm);
2463   best_cost = *cost_limit;
2464
2465   /* Compute the hash index.  */
2466   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2467
2468   /* See if we already know what to do for T.  */
2469   entry_ptr = alg_hash_entry_ptr (hash_index);
2470   if (entry_ptr->t == t
2471       && entry_ptr->mode == mode
2472       && entry_ptr->mode == mode
2473       && entry_ptr->speed == speed
2474       && entry_ptr->alg != alg_unknown)
2475     {
2476       cache_alg = entry_ptr->alg;
2477
2478       if (cache_alg == alg_impossible)
2479         {
2480           /* The cache tells us that it's impossible to synthesize
2481              multiplication by T within entry_ptr->cost.  */
2482           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2483             /* COST_LIMIT is at least as restrictive as the one
2484                recorded in the hash table, in which case we have no
2485                hope of synthesizing a multiplication.  Just
2486                return.  */
2487             return;
2488
2489           /* If we get here, COST_LIMIT is less restrictive than the
2490              one recorded in the hash table, so we may be able to
2491              synthesize a multiplication.  Proceed as if we didn't
2492              have the cache entry.  */
2493         }
2494       else
2495         {
2496           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2497             /* The cached algorithm shows that this multiplication
2498                requires more cost than COST_LIMIT.  Just return.  This
2499                way, we don't clobber this cache entry with
2500                alg_impossible but retain useful information.  */
2501             return;
2502
2503           cache_hit = true;
2504
2505           switch (cache_alg)
2506             {
2507             case alg_shift:
2508               goto do_alg_shift;
2509
2510             case alg_add_t_m2:
2511             case alg_sub_t_m2:
2512               goto do_alg_addsub_t_m2;
2513
2514             case alg_add_factor:
2515             case alg_sub_factor:
2516               goto do_alg_addsub_factor;
2517
2518             case alg_add_t2_m:
2519               goto do_alg_add_t2_m;
2520
2521             case alg_sub_t2_m:
2522               goto do_alg_sub_t2_m;
2523
2524             default:
2525               gcc_unreachable ();
2526             }
2527         }
2528     }
2529
2530   /* If we have a group of zero bits at the low-order part of T, try
2531      multiplying by the remaining bits and then doing a shift.  */
2532
2533   if ((t & 1) == 0)
2534     {
2535     do_alg_shift:
2536       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2537       if (m < maxm)
2538         {
2539           q = t >> m;
2540           /* The function expand_shift will choose between a shift and
2541              a sequence of additions, so the observed cost is given as
2542              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2543           op_cost = m * add_cost (speed, mode);
2544           if (shift_cost (speed, mode, m) < op_cost)
2545             op_cost = shift_cost (speed, mode, m);
2546           new_limit.cost = best_cost.cost - op_cost;
2547           new_limit.latency = best_cost.latency - op_cost;
2548           synth_mult (alg_in, q, &new_limit, mode);
2549
2550           alg_in->cost.cost += op_cost;
2551           alg_in->cost.latency += op_cost;
2552           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2553             {
2554               struct algorithm *x;
2555               best_cost = alg_in->cost;
2556               x = alg_in, alg_in = best_alg, best_alg = x;
2557               best_alg->log[best_alg->ops] = m;
2558               best_alg->op[best_alg->ops] = alg_shift;
2559             }
2560
2561           /* See if treating ORIG_T as a signed number yields a better
2562              sequence.  Try this sequence only for a negative ORIG_T
2563              as it would be useless for a non-negative ORIG_T.  */
2564           if ((HOST_WIDE_INT) orig_t < 0)
2565             {
2566               /* Shift ORIG_T as follows because a right shift of a
2567                  negative-valued signed type is implementation
2568                  defined.  */
2569               q = ~(~orig_t >> m);
2570               /* The function expand_shift will choose between a shift
2571                  and a sequence of additions, so the observed cost is
2572                  given as MIN (m * add_cost(speed, mode),
2573                  shift_cost(speed, mode, m)).  */
2574               op_cost = m * add_cost (speed, mode);
2575               if (shift_cost (speed, mode, m) < op_cost)
2576                 op_cost = shift_cost (speed, mode, m);
2577               new_limit.cost = best_cost.cost - op_cost;
2578               new_limit.latency = best_cost.latency - op_cost;
2579               synth_mult (alg_in, q, &new_limit, mode);
2580
2581               alg_in->cost.cost += op_cost;
2582               alg_in->cost.latency += op_cost;
2583               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2584                 {
2585                   struct algorithm *x;
2586                   best_cost = alg_in->cost;
2587                   x = alg_in, alg_in = best_alg, best_alg = x;
2588                   best_alg->log[best_alg->ops] = m;
2589                   best_alg->op[best_alg->ops] = alg_shift;
2590                 }
2591             }
2592         }
2593       if (cache_hit)
2594         goto done;
2595     }
2596
2597   /* If we have an odd number, add or subtract one.  */
2598   if ((t & 1) != 0)
2599     {
2600       unsigned HOST_WIDE_INT w;
2601
2602     do_alg_addsub_t_m2:
2603       for (w = 1; (w & t) != 0; w <<= 1)
2604         ;
2605       /* If T was -1, then W will be zero after the loop.  This is another
2606          case where T ends with ...111.  Handling this with (T + 1) and
2607          subtract 1 produces slightly better code and results in algorithm
2608          selection much faster than treating it like the ...0111 case
2609          below.  */
2610       if (w == 0
2611           || (w > 2
2612               /* Reject the case where t is 3.
2613                  Thus we prefer addition in that case.  */
2614               && t != 3))
2615         {
2616           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2617
2618           op_cost = add_cost (speed, mode);
2619           new_limit.cost = best_cost.cost - op_cost;
2620           new_limit.latency = best_cost.latency - op_cost;
2621           synth_mult (alg_in, t + 1, &new_limit, mode);
2622
2623           alg_in->cost.cost += op_cost;
2624           alg_in->cost.latency += op_cost;
2625           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2626             {
2627               struct algorithm *x;
2628               best_cost = alg_in->cost;
2629               x = alg_in, alg_in = best_alg, best_alg = x;
2630               best_alg->log[best_alg->ops] = 0;
2631               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2632             }
2633         }
2634       else
2635         {
2636           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2637
2638           op_cost = add_cost (speed, mode);
2639           new_limit.cost = best_cost.cost - op_cost;
2640           new_limit.latency = best_cost.latency - op_cost;
2641           synth_mult (alg_in, t - 1, &new_limit, mode);
2642
2643           alg_in->cost.cost += op_cost;
2644           alg_in->cost.latency += op_cost;
2645           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2646             {
2647               struct algorithm *x;
2648               best_cost = alg_in->cost;
2649               x = alg_in, alg_in = best_alg, best_alg = x;
2650               best_alg->log[best_alg->ops] = 0;
2651               best_alg->op[best_alg->ops] = alg_add_t_m2;
2652             }
2653         }
2654
2655       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2656          quickly with a - a * n for some appropriate constant n.  */
2657       m = exact_log2 (-orig_t + 1);
2658       if (m >= 0 && m < maxm)
2659         {
2660           op_cost = shiftsub1_cost (speed, mode, m);
2661           new_limit.cost = best_cost.cost - op_cost;
2662           new_limit.latency = best_cost.latency - op_cost;
2663           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2664                       &new_limit, mode);
2665
2666           alg_in->cost.cost += op_cost;
2667           alg_in->cost.latency += op_cost;
2668           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2669             {
2670               struct algorithm *x;
2671               best_cost = alg_in->cost;
2672               x = alg_in, alg_in = best_alg, best_alg = x;
2673               best_alg->log[best_alg->ops] = m;
2674               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2675             }
2676         }
2677
2678       if (cache_hit)
2679         goto done;
2680     }
2681
2682   /* Look for factors of t of the form
2683      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2684      If we find such a factor, we can multiply by t using an algorithm that
2685      multiplies by q, shift the result by m and add/subtract it to itself.
2686
2687      We search for large factors first and loop down, even if large factors
2688      are less probable than small; if we find a large factor we will find a
2689      good sequence quickly, and therefore be able to prune (by decreasing
2690      COST_LIMIT) the search.  */
2691
2692  do_alg_addsub_factor:
2693   for (m = floor_log2 (t - 1); m >= 2; m--)
2694     {
2695       unsigned HOST_WIDE_INT d;
2696
2697       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2698       if (t % d == 0 && t > d && m < maxm
2699           && (!cache_hit || cache_alg == alg_add_factor))
2700         {
2701           /* If the target has a cheap shift-and-add instruction use
2702              that in preference to a shift insn followed by an add insn.
2703              Assume that the shift-and-add is "atomic" with a latency
2704              equal to its cost, otherwise assume that on superscalar
2705              hardware the shift may be executed concurrently with the
2706              earlier steps in the algorithm.  */
2707           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2708           if (shiftadd_cost (speed, mode, m) < op_cost)
2709             {
2710               op_cost = shiftadd_cost (speed, mode, m);
2711               op_latency = op_cost;
2712             }
2713           else
2714             op_latency = add_cost (speed, mode);
2715
2716           new_limit.cost = best_cost.cost - op_cost;
2717           new_limit.latency = best_cost.latency - op_latency;
2718           synth_mult (alg_in, t / d, &new_limit, mode);
2719
2720           alg_in->cost.cost += op_cost;
2721           alg_in->cost.latency += op_latency;
2722           if (alg_in->cost.latency < op_cost)
2723             alg_in->cost.latency = op_cost;
2724           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2725             {
2726               struct algorithm *x;
2727               best_cost = alg_in->cost;
2728               x = alg_in, alg_in = best_alg, best_alg = x;
2729               best_alg->log[best_alg->ops] = m;
2730               best_alg->op[best_alg->ops] = alg_add_factor;
2731             }
2732           /* Other factors will have been taken care of in the recursion.  */
2733           break;
2734         }
2735
2736       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2737       if (t % d == 0 && t > d && m < maxm
2738           && (!cache_hit || cache_alg == alg_sub_factor))
2739         {
2740           /* If the target has a cheap shift-and-subtract insn use
2741              that in preference to a shift insn followed by a sub insn.
2742              Assume that the shift-and-sub is "atomic" with a latency
2743              equal to it's cost, otherwise assume that on superscalar
2744              hardware the shift may be executed concurrently with the
2745              earlier steps in the algorithm.  */
2746           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2747           if (shiftsub0_cost (speed, mode, m) < op_cost)
2748             {
2749               op_cost = shiftsub0_cost (speed, mode, m);
2750               op_latency = op_cost;
2751             }
2752           else
2753             op_latency = add_cost (speed, mode);
2754
2755           new_limit.cost = best_cost.cost - op_cost;
2756           new_limit.latency = best_cost.latency - op_latency;
2757           synth_mult (alg_in, t / d, &new_limit, mode);
2758
2759           alg_in->cost.cost += op_cost;
2760           alg_in->cost.latency += op_latency;
2761           if (alg_in->cost.latency < op_cost)
2762             alg_in->cost.latency = op_cost;
2763           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2764             {
2765               struct algorithm *x;
2766               best_cost = alg_in->cost;
2767               x = alg_in, alg_in = best_alg, best_alg = x;
2768               best_alg->log[best_alg->ops] = m;
2769               best_alg->op[best_alg->ops] = alg_sub_factor;
2770             }
2771           break;
2772         }
2773     }
2774   if (cache_hit)
2775     goto done;
2776
2777   /* Try shift-and-add (load effective address) instructions,
2778      i.e. do a*3, a*5, a*9.  */
2779   if ((t & 1) != 0)
2780     {
2781     do_alg_add_t2_m:
2782       q = t - 1;
2783       q = q & -q;
2784       m = exact_log2 (q);
2785       if (m >= 0 && m < maxm)
2786         {
2787           op_cost = shiftadd_cost (speed, mode, m);
2788           new_limit.cost = best_cost.cost - op_cost;
2789           new_limit.latency = best_cost.latency - op_cost;
2790           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2791
2792           alg_in->cost.cost += op_cost;
2793           alg_in->cost.latency += op_cost;
2794           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2795             {
2796               struct algorithm *x;
2797               best_cost = alg_in->cost;
2798               x = alg_in, alg_in = best_alg, best_alg = x;
2799               best_alg->log[best_alg->ops] = m;
2800               best_alg->op[best_alg->ops] = alg_add_t2_m;
2801             }
2802         }
2803       if (cache_hit)
2804         goto done;
2805
2806     do_alg_sub_t2_m:
2807       q = t + 1;
2808       q = q & -q;
2809       m = exact_log2 (q);
2810       if (m >= 0 && m < maxm)
2811         {
2812           op_cost = shiftsub0_cost (speed, mode, m);
2813           new_limit.cost = best_cost.cost - op_cost;
2814           new_limit.latency = best_cost.latency - op_cost;
2815           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2816
2817           alg_in->cost.cost += op_cost;
2818           alg_in->cost.latency += op_cost;
2819           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2820             {
2821               struct algorithm *x;
2822               best_cost = alg_in->cost;
2823               x = alg_in, alg_in = best_alg, best_alg = x;
2824               best_alg->log[best_alg->ops] = m;
2825               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2826             }
2827         }
2828       if (cache_hit)
2829         goto done;
2830     }
2831
2832  done:
2833   /* If best_cost has not decreased, we have not found any algorithm.  */
2834   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2835     {
2836       /* We failed to find an algorithm.  Record alg_impossible for
2837          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2838          we are asked to find an algorithm for T within the same or
2839          lower COST_LIMIT, we can immediately return to the
2840          caller.  */
2841       entry_ptr->t = t;
2842       entry_ptr->mode = mode;
2843       entry_ptr->speed = speed;
2844       entry_ptr->alg = alg_impossible;
2845       entry_ptr->cost = *cost_limit;
2846       return;
2847     }
2848
2849   /* Cache the result.  */
2850   if (!cache_hit)
2851     {
2852       entry_ptr->t = t;
2853       entry_ptr->mode = mode;
2854       entry_ptr->speed = speed;
2855       entry_ptr->alg = best_alg->op[best_alg->ops];
2856       entry_ptr->cost.cost = best_cost.cost;
2857       entry_ptr->cost.latency = best_cost.latency;
2858     }
2859
2860   /* If we are getting a too long sequence for `struct algorithm'
2861      to record, make this search fail.  */
2862   if (best_alg->ops == MAX_BITS_PER_WORD)
2863     return;
2864
2865   /* Copy the algorithm from temporary space to the space at alg_out.
2866      We avoid using structure assignment because the majority of
2867      best_alg is normally undefined, and this is a critical function.  */
2868   alg_out->ops = best_alg->ops + 1;
2869   alg_out->cost = best_cost;
2870   memcpy (alg_out->op, best_alg->op,
2871           alg_out->ops * sizeof *alg_out->op);
2872   memcpy (alg_out->log, best_alg->log,
2873           alg_out->ops * sizeof *alg_out->log);
2874 }
2875 \f
2876 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2877    Try three variations:
2878
2879        - a shift/add sequence based on VAL itself
2880        - a shift/add sequence based on -VAL, followed by a negation
2881        - a shift/add sequence based on VAL - 1, followed by an addition.
2882
2883    Return true if the cheapest of these cost less than MULT_COST,
2884    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2885
2886 static bool
2887 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
2888                      struct algorithm *alg, enum mult_variant *variant,
2889                      int mult_cost)
2890 {
2891   struct algorithm alg2;
2892   struct mult_cost limit;
2893   int op_cost;
2894   bool speed = optimize_insn_for_speed_p ();
2895
2896   /* Fail quickly for impossible bounds.  */
2897   if (mult_cost < 0)
2898     return false;
2899
2900   /* Ensure that mult_cost provides a reasonable upper bound.
2901      Any constant multiplication can be performed with less
2902      than 2 * bits additions.  */
2903   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2904   if (mult_cost > op_cost)
2905     mult_cost = op_cost;
2906
2907   *variant = basic_variant;
2908   limit.cost = mult_cost;
2909   limit.latency = mult_cost;
2910   synth_mult (alg, val, &limit, mode);
2911
2912   /* This works only if the inverted value actually fits in an
2913      `unsigned int' */
2914   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2915     {
2916       op_cost = neg_cost (speed, mode);
2917       if (MULT_COST_LESS (&alg->cost, mult_cost))
2918         {
2919           limit.cost = alg->cost.cost - op_cost;
2920           limit.latency = alg->cost.latency - op_cost;
2921         }
2922       else
2923         {
2924           limit.cost = mult_cost - op_cost;
2925           limit.latency = mult_cost - op_cost;
2926         }
2927
2928       synth_mult (&alg2, -val, &limit, mode);
2929       alg2.cost.cost += op_cost;
2930       alg2.cost.latency += op_cost;
2931       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2932         *alg = alg2, *variant = negate_variant;
2933     }
2934
2935   /* This proves very useful for division-by-constant.  */
2936   op_cost = add_cost (speed, mode);
2937   if (MULT_COST_LESS (&alg->cost, mult_cost))
2938     {
2939       limit.cost = alg->cost.cost - op_cost;
2940       limit.latency = alg->cost.latency - op_cost;
2941     }
2942   else
2943     {
2944       limit.cost = mult_cost - op_cost;
2945       limit.latency = mult_cost - op_cost;
2946     }
2947
2948   synth_mult (&alg2, val - 1, &limit, mode);
2949   alg2.cost.cost += op_cost;
2950   alg2.cost.latency += op_cost;
2951   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2952     *alg = alg2, *variant = add_variant;
2953
2954   return MULT_COST_LESS (&alg->cost, mult_cost);
2955 }
2956
2957 /* A subroutine of expand_mult, used for constant multiplications.
2958    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2959    convenient.  Use the shift/add sequence described by ALG and apply
2960    the final fixup specified by VARIANT.  */
2961
2962 static rtx
2963 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
2964                    rtx target, const struct algorithm *alg,
2965                    enum mult_variant variant)
2966 {
2967   HOST_WIDE_INT val_so_far;
2968   rtx_insn *insn;
2969   rtx accum, tem;
2970   int opno;
2971   machine_mode nmode;
2972
2973   /* Avoid referencing memory over and over and invalid sharing
2974      on SUBREGs.  */
2975   op0 = force_reg (mode, op0);
2976
2977   /* ACCUM starts out either as OP0 or as a zero, depending on
2978      the first operation.  */
2979
2980   if (alg->op[0] == alg_zero)
2981     {
2982       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2983       val_so_far = 0;
2984     }
2985   else if (alg->op[0] == alg_m)
2986     {
2987       accum = copy_to_mode_reg (mode, op0);
2988       val_so_far = 1;
2989     }
2990   else
2991     gcc_unreachable ();
2992
2993   for (opno = 1; opno < alg->ops; opno++)
2994     {
2995       int log = alg->log[opno];
2996       rtx shift_subtarget = optimize ? 0 : accum;
2997       rtx add_target
2998         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2999            && !optimize)
3000           ? target : 0;
3001       rtx accum_target = optimize ? 0 : accum;
3002       rtx accum_inner;
3003
3004       switch (alg->op[opno])
3005         {
3006         case alg_shift:
3007           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3008           /* REG_EQUAL note will be attached to the following insn.  */
3009           emit_move_insn (accum, tem);
3010           val_so_far <<= log;
3011           break;
3012
3013         case alg_add_t_m2:
3014           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3015           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3016                                  add_target ? add_target : accum_target);
3017           val_so_far += (HOST_WIDE_INT) 1 << log;
3018           break;
3019
3020         case alg_sub_t_m2:
3021           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3022           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3023                                  add_target ? add_target : accum_target);
3024           val_so_far -= (HOST_WIDE_INT) 1 << log;
3025           break;
3026
3027         case alg_add_t2_m:
3028           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3029                                 log, shift_subtarget, 0);
3030           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3031                                  add_target ? add_target : accum_target);
3032           val_so_far = (val_so_far << log) + 1;
3033           break;
3034
3035         case alg_sub_t2_m:
3036           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3037                                 log, shift_subtarget, 0);
3038           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3039                                  add_target ? add_target : accum_target);
3040           val_so_far = (val_so_far << log) - 1;
3041           break;
3042
3043         case alg_add_factor:
3044           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3045           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3046                                  add_target ? add_target : accum_target);
3047           val_so_far += val_so_far << log;
3048           break;
3049
3050         case alg_sub_factor:
3051           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3052           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3053                                  (add_target
3054                                   ? add_target : (optimize ? 0 : tem)));
3055           val_so_far = (val_so_far << log) - val_so_far;
3056           break;
3057
3058         default:
3059           gcc_unreachable ();
3060         }
3061
3062       if (SCALAR_INT_MODE_P (mode))
3063         {
3064           /* Write a REG_EQUAL note on the last insn so that we can cse
3065              multiplication sequences.  Note that if ACCUM is a SUBREG,
3066              we've set the inner register and must properly indicate that.  */
3067           tem = op0, nmode = mode;
3068           accum_inner = accum;
3069           if (GET_CODE (accum) == SUBREG)
3070             {
3071               accum_inner = SUBREG_REG (accum);
3072               nmode = GET_MODE (accum_inner);
3073               tem = gen_lowpart (nmode, op0);
3074             }
3075
3076           insn = get_last_insn ();
3077           set_dst_reg_note (insn, REG_EQUAL,
3078                             gen_rtx_MULT (nmode, tem,
3079                                           gen_int_mode (val_so_far, nmode)),
3080                             accum_inner);
3081         }
3082     }
3083
3084   if (variant == negate_variant)
3085     {
3086       val_so_far = -val_so_far;
3087       accum = expand_unop (mode, neg_optab, accum, target, 0);
3088     }
3089   else if (variant == add_variant)
3090     {
3091       val_so_far = val_so_far + 1;
3092       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3093     }
3094
3095   /* Compare only the bits of val and val_so_far that are significant
3096      in the result mode, to avoid sign-/zero-extension confusion.  */
3097   nmode = GET_MODE_INNER (mode);
3098   if (nmode == VOIDmode)
3099     nmode = mode;
3100   val &= GET_MODE_MASK (nmode);
3101   val_so_far &= GET_MODE_MASK (nmode);
3102   gcc_assert (val == val_so_far);
3103
3104   return accum;
3105 }
3106
3107 /* Perform a multiplication and return an rtx for the result.
3108    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3109    TARGET is a suggestion for where to store the result (an rtx).
3110
3111    We check specially for a constant integer as OP1.
3112    If you want this check for OP0 as well, then before calling
3113    you should swap the two operands if OP0 would be constant.  */
3114
3115 rtx
3116 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3117              int unsignedp)
3118 {
3119   enum mult_variant variant;
3120   struct algorithm algorithm;
3121   rtx scalar_op1;
3122   int max_cost;
3123   bool speed = optimize_insn_for_speed_p ();
3124   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3125
3126   if (CONSTANT_P (op0))
3127     std::swap (op0, op1);
3128
3129   /* For vectors, there are several simplifications that can be made if
3130      all elements of the vector constant are identical.  */
3131   scalar_op1 = op1;
3132   if (GET_CODE (op1) == CONST_VECTOR)
3133     {
3134       int i, n = CONST_VECTOR_NUNITS (op1);
3135       scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3136       for (i = 1; i < n; ++i)
3137         if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3138           goto skip_scalar;
3139     }
3140
3141   if (INTEGRAL_MODE_P (mode))
3142     {
3143       rtx fake_reg;
3144       HOST_WIDE_INT coeff;
3145       bool is_neg;
3146       int mode_bitsize;
3147
3148       if (op1 == CONST0_RTX (mode))
3149         return op1;
3150       if (op1 == CONST1_RTX (mode))
3151         return op0;
3152       if (op1 == CONSTM1_RTX (mode))
3153         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3154                             op0, target, 0);
3155
3156       if (do_trapv)
3157         goto skip_synth;
3158
3159       /* If mode is integer vector mode, check if the backend supports
3160          vector lshift (by scalar or vector) at all.  If not, we can't use
3161          synthetized multiply.  */
3162       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3163           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3164           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3165         goto skip_synth;
3166
3167       /* These are the operations that are potentially turned into
3168          a sequence of shifts and additions.  */
3169       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3170
3171       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3172          less than or equal in size to `unsigned int' this doesn't matter.
3173          If the mode is larger than `unsigned int', then synth_mult works
3174          only if the constant value exactly fits in an `unsigned int' without
3175          any truncation.  This means that multiplying by negative values does
3176          not work; results are off by 2^32 on a 32 bit machine.  */
3177       if (CONST_INT_P (scalar_op1))
3178         {
3179           coeff = INTVAL (scalar_op1);
3180           is_neg = coeff < 0;
3181         }
3182 #if TARGET_SUPPORTS_WIDE_INT
3183       else if (CONST_WIDE_INT_P (scalar_op1))
3184 #else
3185       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3186 #endif
3187         {
3188           int shift = wi::exact_log2 (std::make_pair (scalar_op1, mode));
3189           /* Perfect power of 2 (other than 1, which is handled above).  */
3190           if (shift > 0)
3191             return expand_shift (LSHIFT_EXPR, mode, op0,
3192                                  shift, target, unsignedp);
3193           else
3194             goto skip_synth;
3195         }
3196       else
3197         goto skip_synth;
3198
3199       /* We used to test optimize here, on the grounds that it's better to
3200          produce a smaller program when -O is not used.  But this causes
3201          such a terrible slowdown sometimes that it seems better to always
3202          use synth_mult.  */
3203
3204       /* Special case powers of two.  */
3205       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3206           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3207         return expand_shift (LSHIFT_EXPR, mode, op0,
3208                              floor_log2 (coeff), target, unsignedp);
3209
3210       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3211
3212       /* Attempt to handle multiplication of DImode values by negative
3213          coefficients, by performing the multiplication by a positive
3214          multiplier and then inverting the result.  */
3215       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3216         {
3217           /* Its safe to use -coeff even for INT_MIN, as the
3218              result is interpreted as an unsigned coefficient.
3219              Exclude cost of op0 from max_cost to match the cost
3220              calculation of the synth_mult.  */
3221           coeff = -(unsigned HOST_WIDE_INT) coeff;
3222           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3223                       - neg_cost (speed, mode));
3224           if (max_cost <= 0)
3225             goto skip_synth;
3226
3227           /* Special case powers of two.  */
3228           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3229             {
3230               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3231                                        floor_log2 (coeff), target, unsignedp);
3232               return expand_unop (mode, neg_optab, temp, target, 0);
3233             }
3234
3235           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3236                                    max_cost))
3237             {
3238               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3239                                             &algorithm, variant);
3240               return expand_unop (mode, neg_optab, temp, target, 0);
3241             }
3242           goto skip_synth;
3243         }
3244
3245       /* Exclude cost of op0 from max_cost to match the cost
3246          calculation of the synth_mult.  */
3247       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3248       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3249         return expand_mult_const (mode, op0, coeff, target,
3250                                   &algorithm, variant);
3251     }
3252  skip_synth:
3253
3254   /* Expand x*2.0 as x+x.  */
3255   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3256     {
3257       REAL_VALUE_TYPE d;
3258       REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3259
3260       if (REAL_VALUES_EQUAL (d, dconst2))
3261         {
3262           op0 = force_reg (GET_MODE (op0), op0);
3263           return expand_binop (mode, add_optab, op0, op0,
3264                                target, unsignedp, OPTAB_LIB_WIDEN);
3265         }
3266     }
3267  skip_scalar:
3268
3269   /* This used to use umul_optab if unsigned, but for non-widening multiply
3270      there is no difference between signed and unsigned.  */
3271   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3272                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3273   gcc_assert (op0);
3274   return op0;
3275 }
3276
3277 /* Return a cost estimate for multiplying a register by the given
3278    COEFFicient in the given MODE and SPEED.  */
3279
3280 int
3281 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3282 {
3283   int max_cost;
3284   struct algorithm algorithm;
3285   enum mult_variant variant;
3286
3287   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3288   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3289   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3290     return algorithm.cost.cost;
3291   else
3292     return max_cost;
3293 }
3294
3295 /* Perform a widening multiplication and return an rtx for the result.
3296    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3297    TARGET is a suggestion for where to store the result (an rtx).
3298    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3299    or smul_widen_optab.
3300
3301    We check specially for a constant integer as OP1, comparing the
3302    cost of a widening multiply against the cost of a sequence of shifts
3303    and adds.  */
3304
3305 rtx
3306 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3307                       int unsignedp, optab this_optab)
3308 {
3309   bool speed = optimize_insn_for_speed_p ();
3310   rtx cop1;
3311
3312   if (CONST_INT_P (op1)
3313       && GET_MODE (op0) != VOIDmode
3314       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3315                                 this_optab == umul_widen_optab))
3316       && CONST_INT_P (cop1)
3317       && (INTVAL (cop1) >= 0
3318           || HWI_COMPUTABLE_MODE_P (mode)))
3319     {
3320       HOST_WIDE_INT coeff = INTVAL (cop1);
3321       int max_cost;
3322       enum mult_variant variant;
3323       struct algorithm algorithm;
3324
3325       if (coeff == 0)
3326         return CONST0_RTX (mode);
3327
3328       /* Special case powers of two.  */
3329       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3330         {
3331           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3332           return expand_shift (LSHIFT_EXPR, mode, op0,
3333                                floor_log2 (coeff), target, unsignedp);
3334         }
3335
3336       /* Exclude cost of op0 from max_cost to match the cost
3337          calculation of the synth_mult.  */
3338       max_cost = mul_widen_cost (speed, mode);
3339       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3340                                max_cost))
3341         {
3342           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3343           return expand_mult_const (mode, op0, coeff, target,
3344                                     &algorithm, variant);
3345         }
3346     }
3347   return expand_binop (mode, this_optab, op0, op1, target,
3348                        unsignedp, OPTAB_LIB_WIDEN);
3349 }
3350 \f
3351 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3352    replace division by D, and put the least significant N bits of the result
3353    in *MULTIPLIER_PTR and return the most significant bit.
3354
3355    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3356    needed precision is in PRECISION (should be <= N).
3357
3358    PRECISION should be as small as possible so this function can choose
3359    multiplier more freely.
3360
3361    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3362    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3363
3364    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3365    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3366
3367 unsigned HOST_WIDE_INT
3368 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3369                    unsigned HOST_WIDE_INT *multiplier_ptr,
3370                    int *post_shift_ptr, int *lgup_ptr)
3371 {
3372   int lgup, post_shift;
3373   int pow, pow2;
3374
3375   /* lgup = ceil(log2(divisor)); */
3376   lgup = ceil_log2 (d);
3377
3378   gcc_assert (lgup <= n);
3379
3380   pow = n + lgup;
3381   pow2 = n + lgup - precision;
3382
3383   /* mlow = 2^(N + lgup)/d */
3384   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3385   wide_int mlow = wi::udiv_trunc (val, d);
3386
3387   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3388   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3389   wide_int mhigh = wi::udiv_trunc (val, d);
3390
3391   /* If precision == N, then mlow, mhigh exceed 2^N
3392      (but they do not exceed 2^(N+1)).  */
3393
3394   /* Reduce to lowest terms.  */
3395   for (post_shift = lgup; post_shift > 0; post_shift--)
3396     {
3397       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3398                                                        HOST_BITS_PER_WIDE_INT);
3399       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3400                                                        HOST_BITS_PER_WIDE_INT);
3401       if (ml_lo >= mh_lo)
3402         break;
3403
3404       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3405       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3406     }
3407
3408   *post_shift_ptr = post_shift;
3409   *lgup_ptr = lgup;
3410   if (n < HOST_BITS_PER_WIDE_INT)
3411     {
3412       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3413       *multiplier_ptr = mhigh.to_uhwi () & mask;
3414       return mhigh.to_uhwi () >= mask;
3415     }
3416   else
3417     {
3418       *multiplier_ptr = mhigh.to_uhwi ();
3419       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3420     }
3421 }
3422
3423 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3424    congruent to 1 (mod 2**N).  */
3425
3426 static unsigned HOST_WIDE_INT
3427 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3428 {
3429   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3430
3431   /* The algorithm notes that the choice y = x satisfies
3432      x*y == 1 mod 2^3, since x is assumed odd.
3433      Each iteration doubles the number of bits of significance in y.  */
3434
3435   unsigned HOST_WIDE_INT mask;
3436   unsigned HOST_WIDE_INT y = x;
3437   int nbit = 3;
3438
3439   mask = (n == HOST_BITS_PER_WIDE_INT
3440           ? ~(unsigned HOST_WIDE_INT) 0
3441           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3442
3443   while (nbit < n)
3444     {
3445       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3446       nbit *= 2;
3447     }
3448   return y;
3449 }
3450
3451 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3452    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3453    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3454    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3455    become signed.
3456
3457    The result is put in TARGET if that is convenient.
3458
3459    MODE is the mode of operation.  */
3460
3461 rtx
3462 expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
3463                              rtx op1, rtx target, int unsignedp)
3464 {
3465   rtx tem;
3466   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3467
3468   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3469                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3470   tem = expand_and (mode, tem, op1, NULL_RTX);
3471   adj_operand
3472     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3473                      adj_operand);
3474
3475   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3476                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3477   tem = expand_and (mode, tem, op0, NULL_RTX);
3478   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3479                           target);
3480
3481   return target;
3482 }
3483
3484 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3485
3486 static rtx
3487 extract_high_half (machine_mode mode, rtx op)
3488 {
3489   machine_mode wider_mode;
3490
3491   if (mode == word_mode)
3492     return gen_highpart (mode, op);
3493
3494   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3495
3496   wider_mode = GET_MODE_WIDER_MODE (mode);
3497   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3498                      GET_MODE_BITSIZE (mode), 0, 1);
3499   return convert_modes (mode, wider_mode, op, 0);
3500 }
3501
3502 /* Like expmed_mult_highpart, but only consider using a multiplication
3503    optab.  OP1 is an rtx for the constant operand.  */
3504
3505 static rtx
3506 expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
3507                             rtx target, int unsignedp, int max_cost)
3508 {
3509   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3510   machine_mode wider_mode;
3511   optab moptab;
3512   rtx tem;
3513   int size;
3514   bool speed = optimize_insn_for_speed_p ();
3515
3516   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3517
3518   wider_mode = GET_MODE_WIDER_MODE (mode);
3519   size = GET_MODE_BITSIZE (mode);
3520
3521   /* Firstly, try using a multiplication insn that only generates the needed
3522      high part of the product, and in the sign flavor of unsignedp.  */
3523   if (mul_highpart_cost (speed, mode) < max_cost)
3524     {
3525       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3526       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3527                           unsignedp, OPTAB_DIRECT);
3528       if (tem)
3529         return tem;
3530     }
3531
3532   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3533      Need to adjust the result after the multiplication.  */
3534   if (size - 1 < BITS_PER_WORD
3535       && (mul_highpart_cost (speed, mode)
3536           + 2 * shift_cost (speed, mode, size-1)
3537           + 4 * add_cost (speed, mode) < max_cost))
3538     {
3539       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3540       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3541                           unsignedp, OPTAB_DIRECT);
3542       if (tem)
3543         /* We used the wrong signedness.  Adjust the result.  */
3544         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3545                                             tem, unsignedp);
3546     }
3547
3548   /* Try widening multiplication.  */
3549   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3550   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3551       && mul_widen_cost (speed, wider_mode) < max_cost)
3552     {
3553       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3554                           unsignedp, OPTAB_WIDEN);
3555       if (tem)
3556         return extract_high_half (mode, tem);
3557     }
3558
3559   /* Try widening the mode and perform a non-widening multiplication.  */
3560   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3561       && size - 1 < BITS_PER_WORD
3562       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3563           < max_cost))
3564     {
3565       rtx_insn *insns;
3566       rtx wop0, wop1;
3567
3568       /* We need to widen the operands, for example to ensure the
3569          constant multiplier is correctly sign or zero extended.
3570          Use a sequence to clean-up any instructions emitted by
3571          the conversions if things don't work out.  */
3572       start_sequence ();
3573       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3574       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3575       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3576                           unsignedp, OPTAB_WIDEN);
3577       insns = get_insns ();
3578       end_sequence ();
3579
3580       if (tem)
3581         {
3582           emit_insn (insns);
3583           return extract_high_half (mode, tem);
3584         }
3585     }
3586
3587   /* Try widening multiplication of opposite signedness, and adjust.  */
3588   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3589   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3590       && size - 1 < BITS_PER_WORD
3591       && (mul_widen_cost (speed, wider_mode)
3592           + 2 * shift_cost (speed, mode, size-1)
3593           + 4 * add_cost (speed, mode) < max_cost))
3594     {
3595       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3596                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3597       if (tem != 0)
3598         {
3599           tem = extract_high_half (mode, tem);
3600           /* We used the wrong signedness.  Adjust the result.  */
3601           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3602                                               target, unsignedp);
3603         }
3604     }
3605
3606   return 0;
3607 }
3608
3609 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3610    putting the high half of the result in TARGET if that is convenient,
3611    and return where the result is.  If the operation can not be performed,
3612    0 is returned.
3613
3614    MODE is the mode of operation and result.
3615
3616    UNSIGNEDP nonzero means unsigned multiply.
3617
3618    MAX_COST is the total allowed cost for the expanded RTL.  */
3619
3620 static rtx
3621 expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
3622                       rtx target, int unsignedp, int max_cost)
3623 {
3624   machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3625   unsigned HOST_WIDE_INT cnst1;
3626   int extra_cost;
3627   bool sign_adjust = false;
3628   enum mult_variant variant;
3629   struct algorithm alg;
3630   rtx tem;
3631   bool speed = optimize_insn_for_speed_p ();
3632
3633   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3634   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3635   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3636
3637   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3638
3639   /* We can't optimize modes wider than BITS_PER_WORD.
3640      ??? We might be able to perform double-word arithmetic if
3641      mode == word_mode, however all the cost calculations in
3642      synth_mult etc. assume single-word operations.  */
3643   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3644     return expmed_mult_highpart_optab (mode, op0, op1, target,
3645                                        unsignedp, max_cost);
3646
3647   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3648
3649   /* Check whether we try to multiply by a negative constant.  */
3650   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3651     {
3652       sign_adjust = true;
3653       extra_cost += add_cost (speed, mode);
3654     }
3655
3656   /* See whether shift/add multiplication is cheap enough.  */
3657   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3658                            max_cost - extra_cost))
3659     {
3660       /* See whether the specialized multiplication optabs are
3661          cheaper than the shift/add version.  */
3662       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3663                                         alg.cost.cost + extra_cost);
3664       if (tem)
3665         return tem;
3666
3667       tem = convert_to_mode (wider_mode, op0, unsignedp);
3668       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3669       tem = extract_high_half (mode, tem);
3670
3671       /* Adjust result for signedness.  */
3672       if (sign_adjust)
3673         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3674
3675       return tem;
3676     }
3677   return expmed_mult_highpart_optab (mode, op0, op1, target,
3678                                      unsignedp, max_cost);
3679 }
3680
3681
3682 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3683
3684 static rtx
3685 expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3686 {
3687   rtx result, temp, shift;
3688   rtx_code_label *label;
3689   int logd;
3690   int prec = GET_MODE_PRECISION (mode);
3691
3692   logd = floor_log2 (d);
3693   result = gen_reg_rtx (mode);
3694
3695   /* Avoid conditional branches when they're expensive.  */
3696   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3697       && optimize_insn_for_speed_p ())
3698     {
3699       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3700                                       mode, 0, -1);
3701       if (signmask)
3702         {
3703           HOST_WIDE_INT masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3704           signmask = force_reg (mode, signmask);
3705           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3706
3707           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3708              which instruction sequence to use.  If logical right shifts
3709              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3710              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3711
3712           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3713           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3714               || (set_src_cost (temp, optimize_insn_for_speed_p ())
3715                   > COSTS_N_INSNS (2)))
3716             {
3717               temp = expand_binop (mode, xor_optab, op0, signmask,
3718                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3719               temp = expand_binop (mode, sub_optab, temp, signmask,
3720                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3721               temp = expand_binop (mode, and_optab, temp,
3722                                    gen_int_mode (masklow, mode),
3723                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3724               temp = expand_binop (mode, xor_optab, temp, signmask,
3725                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3726               temp = expand_binop (mode, sub_optab, temp, signmask,
3727                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3728             }
3729           else
3730             {
3731               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3732                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3733               signmask = force_reg (mode, signmask);
3734
3735               temp = expand_binop (mode, add_optab, op0, signmask,
3736                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3737               temp = expand_binop (mode, and_optab, temp,
3738                                    gen_int_mode (masklow, mode),
3739                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3740               temp = expand_binop (mode, sub_optab, temp, signmask,
3741                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3742             }
3743           return temp;
3744         }
3745     }
3746
3747   /* Mask contains the mode's signbit and the significant bits of the
3748      modulus.  By including the signbit in the operation, many targets
3749      can avoid an explicit compare operation in the following comparison
3750      against zero.  */
3751   wide_int mask = wi::mask (logd, false, prec);
3752   mask = wi::set_bit (mask, prec - 1);
3753
3754   temp = expand_binop (mode, and_optab, op0,
3755                        immed_wide_int_const (mask, mode),
3756                        result, 1, OPTAB_LIB_WIDEN);
3757   if (temp != result)
3758     emit_move_insn (result, temp);
3759
3760   label = gen_label_rtx ();
3761   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3762
3763   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3764                        0, OPTAB_LIB_WIDEN);
3765
3766   mask = wi::mask (logd, true, prec);
3767   temp = expand_binop (mode, ior_optab, temp,
3768                        immed_wide_int_const (mask, mode),
3769                        result, 1, OPTAB_LIB_WIDEN);
3770   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3771                        0, OPTAB_LIB_WIDEN);
3772   if (temp != result)
3773     emit_move_insn (result, temp);
3774   emit_label (label);
3775   return result;
3776 }
3777
3778 /* Expand signed division of OP0 by a power of two D in mode MODE.
3779    This routine is only called for positive values of D.  */
3780
3781 static rtx
3782 expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3783 {
3784   rtx temp;
3785   rtx_code_label *label;
3786   int logd;
3787
3788   logd = floor_log2 (d);
3789
3790   if (d == 2
3791       && BRANCH_COST (optimize_insn_for_speed_p (),
3792                       false) >= 1)
3793     {
3794       temp = gen_reg_rtx (mode);
3795       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3796       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3797                            0, OPTAB_LIB_WIDEN);
3798       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3799     }
3800
3801 #ifdef HAVE_conditional_move
3802   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3803       >= 2)
3804     {
3805       rtx temp2;
3806
3807       start_sequence ();
3808       temp2 = copy_to_mode_reg (mode, op0);
3809       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3810                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3811       temp = force_reg (mode, temp);
3812
3813       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3814       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3815                                      mode, temp, temp2, mode, 0);
3816       if (temp2)
3817         {
3818           rtx_insn *seq = get_insns ();
3819           end_sequence ();
3820           emit_insn (seq);
3821           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3822         }
3823       end_sequence ();
3824     }
3825 #endif
3826
3827   if (BRANCH_COST (optimize_insn_for_speed_p (),
3828                    false) >= 2)
3829     {
3830       int ushift = GET_MODE_BITSIZE (mode) - logd;
3831
3832       temp = gen_reg_rtx (mode);
3833       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3834       if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3835           || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3836              > COSTS_N_INSNS (1))
3837         temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3838                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3839       else
3840         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3841                              ushift, NULL_RTX, 1);
3842       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3843                            0, OPTAB_LIB_WIDEN);
3844       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3845     }
3846
3847   label = gen_label_rtx ();
3848   temp = copy_to_mode_reg (mode, op0);
3849   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3850   expand_inc (temp, gen_int_mode (d - 1, mode));
3851   emit_label (label);
3852   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3853 }
3854 \f
3855 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3856    if that is convenient, and returning where the result is.
3857    You may request either the quotient or the remainder as the result;
3858    specify REM_FLAG nonzero to get the remainder.
3859
3860    CODE is the expression code for which kind of division this is;
3861    it controls how rounding is done.  MODE is the machine mode to use.
3862    UNSIGNEDP nonzero means do unsigned division.  */
3863
3864 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3865    and then correct it by or'ing in missing high bits
3866    if result of ANDI is nonzero.
3867    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3868    This could optimize to a bfexts instruction.
3869    But C doesn't use these operations, so their optimizations are
3870    left for later.  */
3871 /* ??? For modulo, we don't actually need the highpart of the first product,
3872    the low part will do nicely.  And for small divisors, the second multiply
3873    can also be a low-part only multiply or even be completely left out.
3874    E.g. to calculate the remainder of a division by 3 with a 32 bit
3875    multiply, multiply with 0x55555556 and extract the upper two bits;
3876    the result is exact for inputs up to 0x1fffffff.
3877    The input range can be reduced by using cross-sum rules.
3878    For odd divisors >= 3, the following table gives right shift counts
3879    so that if a number is shifted by an integer multiple of the given
3880    amount, the remainder stays the same:
3881    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3882    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3883    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3884    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3885    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3886
3887    Cross-sum rules for even numbers can be derived by leaving as many bits
3888    to the right alone as the divisor has zeros to the right.
3889    E.g. if x is an unsigned 32 bit number:
3890    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3891    */
3892
3893 rtx
3894 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
3895                rtx op0, rtx op1, rtx target, int unsignedp)
3896 {
3897   machine_mode compute_mode;
3898   rtx tquotient;
3899   rtx quotient = 0, remainder = 0;
3900   rtx_insn *last;
3901   int size;
3902   rtx_insn *insn;
3903   optab optab1, optab2;
3904   int op1_is_constant, op1_is_pow2 = 0;
3905   int max_cost, extra_cost;
3906   static HOST_WIDE_INT last_div_const = 0;
3907   bool speed = optimize_insn_for_speed_p ();
3908
3909   op1_is_constant = CONST_INT_P (op1);
3910   if (op1_is_constant)
3911     {
3912       unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1);
3913       if (unsignedp)
3914         ext_op1 &= GET_MODE_MASK (mode);
3915       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3916                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3917     }
3918
3919   /*
3920      This is the structure of expand_divmod:
3921
3922      First comes code to fix up the operands so we can perform the operations
3923      correctly and efficiently.
3924
3925      Second comes a switch statement with code specific for each rounding mode.
3926      For some special operands this code emits all RTL for the desired
3927      operation, for other cases, it generates only a quotient and stores it in
3928      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3929      to indicate that it has not done anything.
3930
3931      Last comes code that finishes the operation.  If QUOTIENT is set and
3932      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3933      QUOTIENT is not set, it is computed using trunc rounding.
3934
3935      We try to generate special code for division and remainder when OP1 is a
3936      constant.  If |OP1| = 2**n we can use shifts and some other fast
3937      operations.  For other values of OP1, we compute a carefully selected
3938      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3939      by m.
3940
3941      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3942      half of the product.  Different strategies for generating the product are
3943      implemented in expmed_mult_highpart.
3944
3945      If what we actually want is the remainder, we generate that by another
3946      by-constant multiplication and a subtraction.  */
3947
3948   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3949      code below will malfunction if we are, so check here and handle
3950      the special case if so.  */
3951   if (op1 == const1_rtx)
3952     return rem_flag ? const0_rtx : op0;
3953
3954     /* When dividing by -1, we could get an overflow.
3955      negv_optab can handle overflows.  */
3956   if (! unsignedp && op1 == constm1_rtx)
3957     {
3958       if (rem_flag)
3959         return const0_rtx;
3960       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
3961                           ? negv_optab : neg_optab, op0, target, 0);
3962     }
3963
3964   if (target
3965       /* Don't use the function value register as a target
3966          since we have to read it as well as write it,
3967          and function-inlining gets confused by this.  */
3968       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3969           /* Don't clobber an operand while doing a multi-step calculation.  */
3970           || ((rem_flag || op1_is_constant)
3971               && (reg_mentioned_p (target, op0)
3972                   || (MEM_P (op0) && MEM_P (target))))
3973           || reg_mentioned_p (target, op1)
3974           || (MEM_P (op1) && MEM_P (target))))
3975     target = 0;
3976
3977   /* Get the mode in which to perform this computation.  Normally it will
3978      be MODE, but sometimes we can't do the desired operation in MODE.
3979      If so, pick a wider mode in which we can do the operation.  Convert
3980      to that mode at the start to avoid repeated conversions.
3981
3982      First see what operations we need.  These depend on the expression
3983      we are evaluating.  (We assume that divxx3 insns exist under the
3984      same conditions that modxx3 insns and that these insns don't normally
3985      fail.  If these assumptions are not correct, we may generate less
3986      efficient code in some cases.)
3987
3988      Then see if we find a mode in which we can open-code that operation
3989      (either a division, modulus, or shift).  Finally, check for the smallest
3990      mode for which we can do the operation with a library call.  */
3991
3992   /* We might want to refine this now that we have division-by-constant
3993      optimization.  Since expmed_mult_highpart tries so many variants, it is
3994      not straightforward to generalize this.  Maybe we should make an array
3995      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3996
3997   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3998             ? (unsignedp ? lshr_optab : ashr_optab)
3999             : (unsignedp ? udiv_optab : sdiv_optab));
4000   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
4001             ? optab1
4002             : (unsignedp ? udivmod_optab : sdivmod_optab));
4003
4004   for (compute_mode = mode; compute_mode != VOIDmode;
4005        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4006     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4007         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4008       break;
4009
4010   if (compute_mode == VOIDmode)
4011     for (compute_mode = mode; compute_mode != VOIDmode;
4012          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4013       if (optab_libfunc (optab1, compute_mode)
4014           || optab_libfunc (optab2, compute_mode))
4015         break;
4016
4017   /* If we still couldn't find a mode, use MODE, but expand_binop will
4018      probably die.  */
4019   if (compute_mode == VOIDmode)
4020     compute_mode = mode;
4021
4022   if (target && GET_MODE (target) == compute_mode)
4023     tquotient = target;
4024   else
4025     tquotient = gen_reg_rtx (compute_mode);
4026
4027   size = GET_MODE_BITSIZE (compute_mode);
4028 #if 0
4029   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4030      (mode), and thereby get better code when OP1 is a constant.  Do that
4031      later.  It will require going over all usages of SIZE below.  */
4032   size = GET_MODE_BITSIZE (mode);
4033 #endif
4034
4035   /* Only deduct something for a REM if the last divide done was
4036      for a different constant.   Then set the constant of the last
4037      divide.  */
4038   max_cost = (unsignedp
4039               ? udiv_cost (speed, compute_mode)
4040               : sdiv_cost (speed, compute_mode));
4041   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4042                      && INTVAL (op1) == last_div_const))
4043     max_cost -= (mul_cost (speed, compute_mode)
4044                  + add_cost (speed, compute_mode));
4045
4046   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4047
4048   /* Now convert to the best mode to use.  */
4049   if (compute_mode != mode)
4050     {
4051       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4052       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4053
4054       /* convert_modes may have placed op1 into a register, so we
4055          must recompute the following.  */
4056       op1_is_constant = CONST_INT_P (op1);
4057       op1_is_pow2 = (op1_is_constant
4058                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4059                           || (! unsignedp
4060                               && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1))))));
4061     }
4062
4063   /* If one of the operands is a volatile MEM, copy it into a register.  */
4064
4065   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4066     op0 = force_reg (compute_mode, op0);
4067   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4068     op1 = force_reg (compute_mode, op1);
4069
4070   /* If we need the remainder or if OP1 is constant, we need to
4071      put OP0 in a register in case it has any queued subexpressions.  */
4072   if (rem_flag || op1_is_constant)
4073     op0 = force_reg (compute_mode, op0);
4074
4075   last = get_last_insn ();
4076
4077   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4078   if (unsignedp)
4079     {
4080       if (code == FLOOR_DIV_EXPR)
4081         code = TRUNC_DIV_EXPR;
4082       if (code == FLOOR_MOD_EXPR)
4083         code = TRUNC_MOD_EXPR;
4084       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4085         code = TRUNC_DIV_EXPR;
4086     }
4087
4088   if (op1 != const0_rtx)
4089     switch (code)
4090       {
4091       case TRUNC_MOD_EXPR:
4092       case TRUNC_DIV_EXPR:
4093         if (op1_is_constant)
4094           {
4095             if (unsignedp)
4096               {
4097                 unsigned HOST_WIDE_INT mh, ml;
4098                 int pre_shift, post_shift;
4099                 int dummy;
4100                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4101                                             & GET_MODE_MASK (compute_mode));
4102
4103                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4104                   {
4105                     pre_shift = floor_log2 (d);
4106                     if (rem_flag)
4107                       {
4108                         unsigned HOST_WIDE_INT mask
4109                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4110                         remainder
4111                           = expand_binop (compute_mode, and_optab, op0,
4112                                           gen_int_mode (mask, compute_mode),
4113                                           remainder, 1,
4114                                           OPTAB_LIB_WIDEN);
4115                         if (remainder)
4116                           return gen_lowpart (mode, remainder);
4117                       }
4118                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4119                                              pre_shift, tquotient, 1);
4120                   }
4121                 else if (size <= HOST_BITS_PER_WIDE_INT)
4122                   {
4123                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4124                       {
4125                         /* Most significant bit of divisor is set; emit an scc
4126                            insn.  */
4127                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4128                                                           compute_mode, 1, 1);
4129                       }
4130                     else
4131                       {
4132                         /* Find a suitable multiplier and right shift count
4133                            instead of multiplying with D.  */
4134
4135                         mh = choose_multiplier (d, size, size,
4136                                                 &ml, &post_shift, &dummy);
4137
4138                         /* If the suggested multiplier is more than SIZE bits,
4139                            we can do better for even divisors, using an
4140                            initial right shift.  */
4141                         if (mh != 0 && (d & 1) == 0)
4142                           {
4143                             pre_shift = floor_log2 (d & -d);
4144                             mh = choose_multiplier (d >> pre_shift, size,
4145                                                     size - pre_shift,
4146                                                     &ml, &post_shift, &dummy);
4147                             gcc_assert (!mh);
4148                           }
4149                         else
4150                           pre_shift = 0;
4151
4152                         if (mh != 0)
4153                           {
4154                             rtx t1, t2, t3, t4;
4155
4156                             if (post_shift - 1 >= BITS_PER_WORD)
4157                               goto fail1;
4158
4159                             extra_cost
4160                               = (shift_cost (speed, compute_mode, post_shift - 1)
4161                                  + shift_cost (speed, compute_mode, 1)
4162                                  + 2 * add_cost (speed, compute_mode));
4163                             t1 = expmed_mult_highpart
4164                               (compute_mode, op0,
4165                                gen_int_mode (ml, compute_mode),
4166                                NULL_RTX, 1, max_cost - extra_cost);
4167                             if (t1 == 0)
4168                               goto fail1;
4169                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4170                                                                op0, t1),
4171                                                 NULL_RTX);
4172                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4173                                                t2, 1, NULL_RTX, 1);
4174                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4175                                                               t1, t3),
4176                                                 NULL_RTX);
4177                             quotient = expand_shift
4178                               (RSHIFT_EXPR, compute_mode, t4,
4179                                post_shift - 1, tquotient, 1);
4180                           }
4181                         else
4182                           {
4183                             rtx t1, t2;
4184
4185                             if (pre_shift >= BITS_PER_WORD
4186                                 || post_shift >= BITS_PER_WORD)
4187                               goto fail1;
4188
4189                             t1 = expand_shift
4190                               (RSHIFT_EXPR, compute_mode, op0,
4191                                pre_shift, NULL_RTX, 1);
4192                             extra_cost
4193                               = (shift_cost (speed, compute_mode, pre_shift)
4194                                  + shift_cost (speed, compute_mode, post_shift));
4195                             t2 = expmed_mult_highpart
4196                               (compute_mode, t1,
4197                                gen_int_mode (ml, compute_mode),
4198                                NULL_RTX, 1, max_cost - extra_cost);
4199                             if (t2 == 0)
4200                               goto fail1;
4201                             quotient = expand_shift
4202                               (RSHIFT_EXPR, compute_mode, t2,
4203                                post_shift, tquotient, 1);
4204                           }
4205                       }
4206                   }
4207                 else            /* Too wide mode to use tricky code */
4208                   break;
4209
4210                 insn = get_last_insn ();
4211                 if (insn != last)
4212                   set_dst_reg_note (insn, REG_EQUAL,
4213                                     gen_rtx_UDIV (compute_mode, op0, op1),
4214                                     quotient);
4215               }
4216             else                /* TRUNC_DIV, signed */
4217               {
4218                 unsigned HOST_WIDE_INT ml;
4219                 int lgup, post_shift;
4220                 rtx mlr;
4221                 HOST_WIDE_INT d = INTVAL (op1);
4222                 unsigned HOST_WIDE_INT abs_d;
4223
4224                 /* Since d might be INT_MIN, we have to cast to
4225                    unsigned HOST_WIDE_INT before negating to avoid
4226                    undefined signed overflow.  */
4227                 abs_d = (d >= 0
4228                          ? (unsigned HOST_WIDE_INT) d
4229                          : - (unsigned HOST_WIDE_INT) d);
4230
4231                 /* n rem d = n rem -d */
4232                 if (rem_flag && d < 0)
4233                   {
4234                     d = abs_d;
4235                     op1 = gen_int_mode (abs_d, compute_mode);
4236                   }
4237
4238                 if (d == 1)
4239                   quotient = op0;
4240                 else if (d == -1)
4241                   quotient = expand_unop (compute_mode, neg_optab, op0,
4242                                           tquotient, 0);
4243                 else if (HOST_BITS_PER_WIDE_INT >= size
4244                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4245                   {
4246                     /* This case is not handled correctly below.  */
4247                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4248                                                 compute_mode, 1, 1);
4249                     if (quotient == 0)
4250                       goto fail1;
4251                   }
4252                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4253                          && (rem_flag
4254                              ? smod_pow2_cheap (speed, compute_mode)
4255                              : sdiv_pow2_cheap (speed, compute_mode))
4256                          /* We assume that cheap metric is true if the
4257                             optab has an expander for this mode.  */
4258                          && ((optab_handler ((rem_flag ? smod_optab
4259                                               : sdiv_optab),
4260                                              compute_mode)
4261                               != CODE_FOR_nothing)
4262                              || (optab_handler (sdivmod_optab,
4263                                                 compute_mode)
4264                                  != CODE_FOR_nothing)))
4265                   ;
4266                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4267                   {
4268                     if (rem_flag)
4269                       {
4270                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4271                         if (remainder)
4272                           return gen_lowpart (mode, remainder);
4273                       }
4274
4275                     if (sdiv_pow2_cheap (speed, compute_mode)
4276                         && ((optab_handler (sdiv_optab, compute_mode)
4277                              != CODE_FOR_nothing)
4278                             || (optab_handler (sdivmod_optab, compute_mode)
4279                                 != CODE_FOR_nothing)))
4280                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4281                                                 compute_mode, op0,
4282                                                 gen_int_mode (abs_d,
4283                                                               compute_mode),
4284                                                 NULL_RTX, 0);
4285                     else
4286                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4287
4288                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4289                        negate the quotient.  */
4290                     if (d < 0)
4291                       {
4292                         insn = get_last_insn ();
4293                         if (insn != last
4294                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4295                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4296                           set_dst_reg_note (insn, REG_EQUAL,
4297                                             gen_rtx_DIV (compute_mode, op0,
4298                                                          gen_int_mode
4299                                                            (abs_d,
4300                                                             compute_mode)),
4301                                             quotient);
4302
4303                         quotient = expand_unop (compute_mode, neg_optab,
4304                                                 quotient, quotient, 0);
4305                       }
4306                   }
4307                 else if (size <= HOST_BITS_PER_WIDE_INT)
4308                   {
4309                     choose_multiplier (abs_d, size, size - 1,
4310                                        &ml, &post_shift, &lgup);
4311                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4312                       {
4313                         rtx t1, t2, t3;
4314
4315                         if (post_shift >= BITS_PER_WORD
4316                             || size - 1 >= BITS_PER_WORD)
4317                           goto fail1;
4318
4319                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4320                                       + shift_cost (speed, compute_mode, size - 1)
4321                                       + add_cost (speed, compute_mode));
4322                         t1 = expmed_mult_highpart
4323                           (compute_mode, op0, gen_int_mode (ml, compute_mode),
4324                            NULL_RTX, 0, max_cost - extra_cost);
4325                         if (t1 == 0)
4326                           goto fail1;
4327                         t2 = expand_shift
4328                           (RSHIFT_EXPR, compute_mode, t1,
4329                            post_shift, NULL_RTX, 0);
4330                         t3 = expand_shift
4331                           (RSHIFT_EXPR, compute_mode, op0,
4332                            size - 1, NULL_RTX, 0);
4333                         if (d < 0)
4334                           quotient
4335                             = force_operand (gen_rtx_MINUS (compute_mode,
4336                                                             t3, t2),
4337                                              tquotient);
4338                         else
4339                           quotient
4340                             = force_operand (gen_rtx_MINUS (compute_mode,
4341                                                             t2, t3),
4342                                              tquotient);
4343                       }
4344                     else
4345                       {
4346                         rtx t1, t2, t3, t4;
4347
4348                         if (post_shift >= BITS_PER_WORD
4349                             || size - 1 >= BITS_PER_WORD)
4350                           goto fail1;
4351
4352                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4353                         mlr = gen_int_mode (ml, compute_mode);
4354                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4355                                       + shift_cost (speed, compute_mode, size - 1)
4356                                       + 2 * add_cost (speed, compute_mode));
4357                         t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4358                                                    NULL_RTX, 0,
4359                                                    max_cost - extra_cost);
4360                         if (t1 == 0)
4361                           goto fail1;
4362                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4363                                                           t1, op0),
4364                                             NULL_RTX);
4365                         t3 = expand_shift
4366                           (RSHIFT_EXPR, compute_mode, t2,
4367                            post_shift, NULL_RTX, 0);
4368                         t4 = expand_shift
4369                           (RSHIFT_EXPR, compute_mode, op0,
4370                            size - 1, NULL_RTX, 0);
4371                         if (d < 0)
4372                           quotient
4373                             = force_operand (gen_rtx_MINUS (compute_mode,
4374                                                             t4, t3),
4375                                              tquotient);
4376                         else
4377                           quotient
4378                             = force_operand (gen_rtx_MINUS (compute_mode,
4379                                                             t3, t4),
4380                                              tquotient);
4381                       }
4382                   }
4383                 else            /* Too wide mode to use tricky code */
4384                   break;
4385
4386                 insn = get_last_insn ();
4387                 if (insn != last)
4388                   set_dst_reg_note (insn, REG_EQUAL,
4389                                     gen_rtx_DIV (compute_mode, op0, op1),
4390                                     quotient);
4391               }
4392             break;
4393           }
4394       fail1:
4395         delete_insns_since (last);
4396         break;
4397
4398       case FLOOR_DIV_EXPR:
4399       case FLOOR_MOD_EXPR:
4400       /* We will come here only for signed operations.  */
4401         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4402           {
4403             unsigned HOST_WIDE_INT mh, ml;
4404             int pre_shift, lgup, post_shift;
4405             HOST_WIDE_INT d = INTVAL (op1);
4406
4407             if (d > 0)
4408               {
4409                 /* We could just as easily deal with negative constants here,
4410                    but it does not seem worth the trouble for GCC 2.6.  */
4411                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4412                   {
4413                     pre_shift = floor_log2 (d);
4414                     if (rem_flag)
4415                       {
4416                         unsigned HOST_WIDE_INT mask
4417                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4418                         remainder = expand_binop
4419                           (compute_mode, and_optab, op0,
4420                            gen_int_mode (mask, compute_mode),
4421                            remainder, 0, OPTAB_LIB_WIDEN);
4422                         if (remainder)
4423                           return gen_lowpart (mode, remainder);
4424                       }
4425                     quotient = expand_shift
4426                       (RSHIFT_EXPR, compute_mode, op0,
4427                        pre_shift, tquotient, 0);
4428                   }
4429                 else
4430                   {
4431                     rtx t1, t2, t3, t4;
4432
4433                     mh = choose_multiplier (d, size, size - 1,
4434                                             &ml, &post_shift, &lgup);
4435                     gcc_assert (!mh);
4436
4437                     if (post_shift < BITS_PER_WORD
4438                         && size - 1 < BITS_PER_WORD)
4439                       {
4440                         t1 = expand_shift
4441                           (RSHIFT_EXPR, compute_mode, op0,
4442                            size - 1, NULL_RTX, 0);
4443                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4444                                            NULL_RTX, 0, OPTAB_WIDEN);
4445                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4446                                       + shift_cost (speed, compute_mode, size - 1)
4447                                       + 2 * add_cost (speed, compute_mode));
4448                         t3 = expmed_mult_highpart
4449                           (compute_mode, t2, gen_int_mode (ml, compute_mode),
4450                            NULL_RTX, 1, max_cost - extra_cost);
4451                         if (t3 != 0)
4452                           {
4453                             t4 = expand_shift
4454                               (RSHIFT_EXPR, compute_mode, t3,
4455                                post_shift, NULL_RTX, 1);
4456                             quotient = expand_binop (compute_mode, xor_optab,
4457                                                      t4, t1, tquotient, 0,
4458                                                      OPTAB_WIDEN);
4459                           }
4460                       }
4461                   }
4462               }
4463             else
4464               {
4465                 rtx nsign, t1, t2, t3, t4;
4466                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4467                                                   op0, constm1_rtx), NULL_RTX);
4468                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4469                                    0, OPTAB_WIDEN);
4470                 nsign = expand_shift
4471                   (RSHIFT_EXPR, compute_mode, t2,
4472                    size - 1, NULL_RTX, 0);
4473                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4474                                     NULL_RTX);
4475                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4476                                     NULL_RTX, 0);
4477                 if (t4)
4478                   {
4479                     rtx t5;
4480                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4481                                       NULL_RTX, 0);
4482                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4483                                                             t4, t5),
4484                                               tquotient);
4485                   }
4486               }
4487           }
4488
4489         if (quotient != 0)
4490           break;
4491         delete_insns_since (last);
4492
4493         /* Try using an instruction that produces both the quotient and
4494            remainder, using truncation.  We can easily compensate the quotient
4495            or remainder to get floor rounding, once we have the remainder.
4496            Notice that we compute also the final remainder value here,
4497            and return the result right away.  */
4498         if (target == 0 || GET_MODE (target) != compute_mode)
4499           target = gen_reg_rtx (compute_mode);
4500
4501         if (rem_flag)
4502           {
4503             remainder
4504               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4505             quotient = gen_reg_rtx (compute_mode);
4506           }
4507         else
4508           {
4509             quotient
4510               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4511             remainder = gen_reg_rtx (compute_mode);
4512           }
4513
4514         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4515                                  quotient, remainder, 0))
4516           {
4517             /* This could be computed with a branch-less sequence.
4518                Save that for later.  */
4519             rtx tem;
4520             rtx_code_label *label = gen_label_rtx ();
4521             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4522             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4523                                 NULL_RTX, 0, OPTAB_WIDEN);
4524             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4525             expand_dec (quotient, const1_rtx);
4526             expand_inc (remainder, op1);
4527             emit_label (label);
4528             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4529           }
4530
4531         /* No luck with division elimination or divmod.  Have to do it
4532            by conditionally adjusting op0 *and* the result.  */
4533         {
4534           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4535           rtx adjusted_op0;
4536           rtx tem;
4537
4538           quotient = gen_reg_rtx (compute_mode);
4539           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4540           label1 = gen_label_rtx ();
4541           label2 = gen_label_rtx ();
4542           label3 = gen_label_rtx ();
4543           label4 = gen_label_rtx ();
4544           label5 = gen_label_rtx ();
4545           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4546           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4547           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4548                               quotient, 0, OPTAB_LIB_WIDEN);
4549           if (tem != quotient)
4550             emit_move_insn (quotient, tem);
4551           emit_jump_insn (gen_jump (label5));
4552           emit_barrier ();
4553           emit_label (label1);
4554           expand_inc (adjusted_op0, const1_rtx);
4555           emit_jump_insn (gen_jump (label4));
4556           emit_barrier ();
4557           emit_label (label2);
4558           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4559           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4560                               quotient, 0, OPTAB_LIB_WIDEN);
4561           if (tem != quotient)
4562             emit_move_insn (quotient, tem);
4563           emit_jump_insn (gen_jump (label5));
4564           emit_barrier ();
4565           emit_label (label3);
4566           expand_dec (adjusted_op0, const1_rtx);
4567           emit_label (label4);
4568           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4569                               quotient, 0, OPTAB_LIB_WIDEN);
4570           if (tem != quotient)
4571             emit_move_insn (quotient, tem);
4572           expand_dec (quotient, const1_rtx);
4573           emit_label (label5);
4574         }
4575         break;
4576
4577       case CEIL_DIV_EXPR:
4578       case CEIL_MOD_EXPR:
4579         if (unsignedp)
4580           {
4581             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4582               {
4583                 rtx t1, t2, t3;
4584                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4585                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4586                                    floor_log2 (d), tquotient, 1);
4587                 t2 = expand_binop (compute_mode, and_optab, op0,
4588                                    gen_int_mode (d - 1, compute_mode),
4589                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4590                 t3 = gen_reg_rtx (compute_mode);
4591                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4592                                       compute_mode, 1, 1);
4593                 if (t3 == 0)
4594                   {
4595                     rtx_code_label *lab;
4596                     lab = gen_label_rtx ();
4597                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4598                     expand_inc (t1, const1_rtx);
4599                     emit_label (lab);
4600                     quotient = t1;
4601                   }
4602                 else
4603                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4604                                                           t1, t3),
4605                                             tquotient);
4606                 break;
4607               }
4608
4609             /* Try using an instruction that produces both the quotient and
4610                remainder, using truncation.  We can easily compensate the
4611                quotient or remainder to get ceiling rounding, once we have the
4612                remainder.  Notice that we compute also the final remainder
4613                value here, and return the result right away.  */
4614             if (target == 0 || GET_MODE (target) != compute_mode)
4615               target = gen_reg_rtx (compute_mode);
4616
4617             if (rem_flag)
4618               {
4619                 remainder = (REG_P (target)
4620                              ? target : gen_reg_rtx (compute_mode));
4621                 quotient = gen_reg_rtx (compute_mode);
4622               }
4623             else
4624               {
4625                 quotient = (REG_P (target)
4626                             ? target : gen_reg_rtx (compute_mode));
4627                 remainder = gen_reg_rtx (compute_mode);
4628               }
4629
4630             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4631                                      remainder, 1))
4632               {
4633                 /* This could be computed with a branch-less sequence.
4634                    Save that for later.  */
4635                 rtx_code_label *label = gen_label_rtx ();
4636                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4637                                  compute_mode, label);
4638                 expand_inc (quotient, const1_rtx);
4639                 expand_dec (remainder, op1);
4640                 emit_label (label);
4641                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4642               }
4643
4644             /* No luck with division elimination or divmod.  Have to do it
4645                by conditionally adjusting op0 *and* the result.  */
4646             {
4647               rtx_code_label *label1, *label2;
4648               rtx adjusted_op0, tem;
4649
4650               quotient = gen_reg_rtx (compute_mode);
4651               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4652               label1 = gen_label_rtx ();
4653               label2 = gen_label_rtx ();
4654               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4655                                compute_mode, label1);
4656               emit_move_insn  (quotient, const0_rtx);
4657               emit_jump_insn (gen_jump (label2));
4658               emit_barrier ();
4659               emit_label (label1);
4660               expand_dec (adjusted_op0, const1_rtx);
4661               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4662                                   quotient, 1, OPTAB_LIB_WIDEN);
4663               if (tem != quotient)
4664                 emit_move_insn (quotient, tem);
4665               expand_inc (quotient, const1_rtx);
4666               emit_label (label2);
4667             }
4668           }
4669         else /* signed */
4670           {
4671             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4672                 && INTVAL (op1) >= 0)
4673               {
4674                 /* This is extremely similar to the code for the unsigned case
4675                    above.  For 2.7 we should merge these variants, but for
4676                    2.6.1 I don't want to touch the code for unsigned since that
4677                    get used in C.  The signed case will only be used by other
4678                    languages (Ada).  */
4679
4680                 rtx t1, t2, t3;
4681                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4682                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4683                                    floor_log2 (d), tquotient, 0);
4684                 t2 = expand_binop (compute_mode, and_optab, op0,
4685                                    gen_int_mode (d - 1, compute_mode),
4686                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4687                 t3 = gen_reg_rtx (compute_mode);
4688                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4689                                       compute_mode, 1, 1);
4690                 if (t3 == 0)
4691                   {
4692                     rtx_code_label *lab;
4693                     lab = gen_label_rtx ();
4694                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4695                     expand_inc (t1, const1_rtx);
4696                     emit_label (lab);
4697                     quotient = t1;
4698                   }
4699                 else
4700                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4701                                                           t1, t3),
4702                                             tquotient);
4703                 break;
4704               }
4705
4706             /* Try using an instruction that produces both the quotient and
4707                remainder, using truncation.  We can easily compensate the
4708                quotient or remainder to get ceiling rounding, once we have the
4709                remainder.  Notice that we compute also the final remainder
4710                value here, and return the result right away.  */
4711             if (target == 0 || GET_MODE (target) != compute_mode)
4712               target = gen_reg_rtx (compute_mode);
4713             if (rem_flag)
4714               {
4715                 remainder= (REG_P (target)
4716                             ? target : gen_reg_rtx (compute_mode));
4717                 quotient = gen_reg_rtx (compute_mode);
4718               }
4719             else
4720               {
4721                 quotient = (REG_P (target)
4722                             ? target : gen_reg_rtx (compute_mode));
4723                 remainder = gen_reg_rtx (compute_mode);
4724               }
4725
4726             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4727                                      remainder, 0))
4728               {
4729                 /* This could be computed with a branch-less sequence.
4730                    Save that for later.  */
4731                 rtx tem;
4732                 rtx_code_label *label = gen_label_rtx ();
4733                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4734                                  compute_mode, label);
4735                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4736                                     NULL_RTX, 0, OPTAB_WIDEN);
4737                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4738                 expand_inc (quotient, const1_rtx);
4739                 expand_dec (remainder, op1);
4740                 emit_label (label);
4741                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4742               }
4743
4744             /* No luck with division elimination or divmod.  Have to do it
4745                by conditionally adjusting op0 *and* the result.  */
4746             {
4747               rtx_code_label *label1, *label2, *label3, *label4, *label5;
4748               rtx adjusted_op0;
4749               rtx tem;
4750
4751               quotient = gen_reg_rtx (compute_mode);
4752               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4753               label1 = gen_label_rtx ();
4754               label2 = gen_label_rtx ();
4755               label3 = gen_label_rtx ();
4756               label4 = gen_label_rtx ();
4757               label5 = gen_label_rtx ();
4758               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4759               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4760                                compute_mode, label1);
4761               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4762                                   quotient, 0, OPTAB_LIB_WIDEN);
4763               if (tem != quotient)
4764                 emit_move_insn (quotient, tem);
4765               emit_jump_insn (gen_jump (label5));
4766               emit_barrier ();
4767               emit_label (label1);
4768               expand_dec (adjusted_op0, const1_rtx);
4769               emit_jump_insn (gen_jump (label4));
4770               emit_barrier ();
4771               emit_label (label2);
4772               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4773                                compute_mode, label3);
4774               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4775                                   quotient, 0, OPTAB_LIB_WIDEN);
4776               if (tem != quotient)
4777                 emit_move_insn (quotient, tem);
4778               emit_jump_insn (gen_jump (label5));
4779               emit_barrier ();
4780               emit_label (label3);
4781               expand_inc (adjusted_op0, const1_rtx);
4782               emit_label (label4);
4783               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4784                                   quotient, 0, OPTAB_LIB_WIDEN);
4785               if (tem != quotient)
4786                 emit_move_insn (quotient, tem);
4787               expand_inc (quotient, const1_rtx);
4788               emit_label (label5);
4789             }
4790           }
4791         break;
4792
4793       case EXACT_DIV_EXPR:
4794         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4795           {
4796             HOST_WIDE_INT d = INTVAL (op1);
4797             unsigned HOST_WIDE_INT ml;
4798             int pre_shift;
4799             rtx t1;
4800
4801             pre_shift = floor_log2 (d & -d);
4802             ml = invert_mod2n (d >> pre_shift, size);
4803             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4804                                pre_shift, NULL_RTX, unsignedp);
4805             quotient = expand_mult (compute_mode, t1,
4806                                     gen_int_mode (ml, compute_mode),
4807                                     NULL_RTX, 1);
4808
4809             insn = get_last_insn ();
4810             set_dst_reg_note (insn, REG_EQUAL,
4811                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4812                                               compute_mode, op0, op1),
4813                               quotient);
4814           }
4815         break;
4816
4817       case ROUND_DIV_EXPR:
4818       case ROUND_MOD_EXPR:
4819         if (unsignedp)
4820           {
4821             rtx tem;
4822             rtx_code_label *label;
4823             label = gen_label_rtx ();
4824             quotient = gen_reg_rtx (compute_mode);
4825             remainder = gen_reg_rtx (compute_mode);
4826             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4827               {
4828                 rtx tem;
4829                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4830                                          quotient, 1, OPTAB_LIB_WIDEN);
4831                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4832                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4833                                           remainder, 1, OPTAB_LIB_WIDEN);
4834               }
4835             tem = plus_constant (compute_mode, op1, -1);
4836             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4837             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4838             expand_inc (quotient, const1_rtx);
4839             expand_dec (remainder, op1);
4840             emit_label (label);
4841           }
4842         else
4843           {
4844             rtx abs_rem, abs_op1, tem, mask;
4845             rtx_code_label *label;
4846             label = gen_label_rtx ();
4847             quotient = gen_reg_rtx (compute_mode);
4848             remainder = gen_reg_rtx (compute_mode);
4849             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4850               {
4851                 rtx tem;
4852                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4853                                          quotient, 0, OPTAB_LIB_WIDEN);
4854                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4855                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4856                                           remainder, 0, OPTAB_LIB_WIDEN);
4857               }
4858             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4859             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4860             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4861                                 1, NULL_RTX, 1);
4862             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4863             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4864                                 NULL_RTX, 0, OPTAB_WIDEN);
4865             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4866                                  size - 1, NULL_RTX, 0);
4867             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4868                                 NULL_RTX, 0, OPTAB_WIDEN);
4869             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4870                                 NULL_RTX, 0, OPTAB_WIDEN);
4871             expand_inc (quotient, tem);
4872             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4873                                 NULL_RTX, 0, OPTAB_WIDEN);
4874             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4875                                 NULL_RTX, 0, OPTAB_WIDEN);
4876             expand_dec (remainder, tem);
4877             emit_label (label);
4878           }
4879         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4880
4881       default:
4882         gcc_unreachable ();
4883       }
4884
4885   if (quotient == 0)
4886     {
4887       if (target && GET_MODE (target) != compute_mode)
4888         target = 0;
4889
4890       if (rem_flag)
4891         {
4892           /* Try to produce the remainder without producing the quotient.
4893              If we seem to have a divmod pattern that does not require widening,
4894              don't try widening here.  We should really have a WIDEN argument
4895              to expand_twoval_binop, since what we'd really like to do here is
4896              1) try a mod insn in compute_mode
4897              2) try a divmod insn in compute_mode
4898              3) try a div insn in compute_mode and multiply-subtract to get
4899                 remainder
4900              4) try the same things with widening allowed.  */
4901           remainder
4902             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4903                                  op0, op1, target,
4904                                  unsignedp,
4905                                  ((optab_handler (optab2, compute_mode)
4906                                    != CODE_FOR_nothing)
4907                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4908           if (remainder == 0)
4909             {
4910               /* No luck there.  Can we do remainder and divide at once
4911                  without a library call?  */
4912               remainder = gen_reg_rtx (compute_mode);
4913               if (! expand_twoval_binop ((unsignedp
4914                                           ? udivmod_optab
4915                                           : sdivmod_optab),
4916                                          op0, op1,
4917                                          NULL_RTX, remainder, unsignedp))
4918                 remainder = 0;
4919             }
4920
4921           if (remainder)
4922             return gen_lowpart (mode, remainder);
4923         }
4924
4925       /* Produce the quotient.  Try a quotient insn, but not a library call.
4926          If we have a divmod in this mode, use it in preference to widening
4927          the div (for this test we assume it will not fail). Note that optab2
4928          is set to the one of the two optabs that the call below will use.  */
4929       quotient
4930         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4931                              op0, op1, rem_flag ? NULL_RTX : target,
4932                              unsignedp,
4933                              ((optab_handler (optab2, compute_mode)
4934                                != CODE_FOR_nothing)
4935                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4936
4937       if (quotient == 0)
4938         {
4939           /* No luck there.  Try a quotient-and-remainder insn,
4940              keeping the quotient alone.  */
4941           quotient = gen_reg_rtx (compute_mode);
4942           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4943                                      op0, op1,
4944                                      quotient, NULL_RTX, unsignedp))
4945             {
4946               quotient = 0;
4947               if (! rem_flag)
4948                 /* Still no luck.  If we are not computing the remainder,
4949                    use a library call for the quotient.  */
4950                 quotient = sign_expand_binop (compute_mode,
4951                                               udiv_optab, sdiv_optab,
4952                                               op0, op1, target,
4953                                               unsignedp, OPTAB_LIB_WIDEN);
4954             }
4955         }
4956     }
4957
4958   if (rem_flag)
4959     {
4960       if (target && GET_MODE (target) != compute_mode)
4961         target = 0;
4962
4963       if (quotient == 0)
4964         {
4965           /* No divide instruction either.  Use library for remainder.  */
4966           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4967                                          op0, op1, target,
4968                                          unsignedp, OPTAB_LIB_WIDEN);
4969           /* No remainder function.  Try a quotient-and-remainder
4970              function, keeping the remainder.  */
4971           if (!remainder)
4972             {
4973               remainder = gen_reg_rtx (compute_mode);
4974               if (!expand_twoval_binop_libfunc
4975                   (unsignedp ? udivmod_optab : sdivmod_optab,
4976                    op0, op1,
4977                    NULL_RTX, remainder,
4978                    unsignedp ? UMOD : MOD))
4979                 remainder = NULL_RTX;
4980             }
4981         }
4982       else
4983         {
4984           /* We divided.  Now finish doing X - Y * (X / Y).  */
4985           remainder = expand_mult (compute_mode, quotient, op1,
4986                                    NULL_RTX, unsignedp);
4987           remainder = expand_binop (compute_mode, sub_optab, op0,
4988                                     remainder, target, unsignedp,
4989                                     OPTAB_LIB_WIDEN);
4990         }
4991     }
4992
4993   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4994 }
4995 \f
4996 /* Return a tree node with data type TYPE, describing the value of X.
4997    Usually this is an VAR_DECL, if there is no obvious better choice.
4998    X may be an expression, however we only support those expressions
4999    generated by loop.c.  */
5000
5001 tree
5002 make_tree (tree type, rtx x)
5003 {
5004   tree t;
5005
5006   switch (GET_CODE (x))
5007     {
5008     case CONST_INT:
5009     case CONST_WIDE_INT:
5010       t = wide_int_to_tree (type, std::make_pair (x, TYPE_MODE (type)));
5011       return t;
5012
5013     case CONST_DOUBLE:
5014       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5015       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5016         t = wide_int_to_tree (type,
5017                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5018                                                     HOST_BITS_PER_WIDE_INT * 2));
5019       else
5020         {
5021           REAL_VALUE_TYPE d;
5022
5023           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
5024           t = build_real (type, d);
5025         }
5026
5027       return t;
5028
5029     case CONST_VECTOR:
5030       {
5031         int units = CONST_VECTOR_NUNITS (x);
5032         tree itype = TREE_TYPE (type);
5033         tree *elts;
5034         int i;
5035
5036         /* Build a tree with vector elements.  */
5037         elts = XALLOCAVEC (tree, units);
5038         for (i = units - 1; i >= 0; --i)
5039           {
5040             rtx elt = CONST_VECTOR_ELT (x, i);
5041             elts[i] = make_tree (itype, elt);
5042           }
5043
5044         return build_vector (type, elts);
5045       }
5046
5047     case PLUS:
5048       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5049                           make_tree (type, XEXP (x, 1)));
5050
5051     case MINUS:
5052       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5053                           make_tree (type, XEXP (x, 1)));
5054
5055     case NEG:
5056       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5057
5058     case MULT:
5059       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5060                           make_tree (type, XEXP (x, 1)));
5061
5062     case ASHIFT:
5063       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5064                           make_tree (type, XEXP (x, 1)));
5065
5066     case LSHIFTRT:
5067       t = unsigned_type_for (type);
5068       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5069                                          make_tree (t, XEXP (x, 0)),
5070                                          make_tree (type, XEXP (x, 1))));
5071
5072     case ASHIFTRT:
5073       t = signed_type_for (type);
5074       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5075                                          make_tree (t, XEXP (x, 0)),
5076                                          make_tree (type, XEXP (x, 1))));
5077
5078     case DIV:
5079       if (TREE_CODE (type) != REAL_TYPE)
5080         t = signed_type_for (type);
5081       else
5082         t = type;
5083
5084       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5085                                          make_tree (t, XEXP (x, 0)),
5086                                          make_tree (t, XEXP (x, 1))));
5087     case UDIV:
5088       t = unsigned_type_for (type);
5089       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5090                                          make_tree (t, XEXP (x, 0)),
5091                                          make_tree (t, XEXP (x, 1))));
5092
5093     case SIGN_EXTEND:
5094     case ZERO_EXTEND:
5095       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5096                                           GET_CODE (x) == ZERO_EXTEND);
5097       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5098
5099     case CONST:
5100       return make_tree (type, XEXP (x, 0));
5101
5102     case SYMBOL_REF:
5103       t = SYMBOL_REF_DECL (x);
5104       if (t)
5105         return fold_convert (type, build_fold_addr_expr (t));
5106       /* else fall through.  */
5107
5108     default:
5109       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5110
5111       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5112          address mode to pointer mode.  */
5113       if (POINTER_TYPE_P (type))
5114         x = convert_memory_address_addr_space
5115               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5116
5117       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5118          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5119       t->decl_with_rtl.rtl = x;
5120
5121       return t;
5122     }
5123 }
5124 \f
5125 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5126    and returning TARGET.
5127
5128    If TARGET is 0, a pseudo-register or constant is returned.  */
5129
5130 rtx
5131 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5132 {
5133   rtx tem = 0;
5134
5135   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5136     tem = simplify_binary_operation (AND, mode, op0, op1);
5137   if (tem == 0)
5138     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5139
5140   if (target == 0)
5141     target = tem;
5142   else if (tem != target)
5143     emit_move_insn (target, tem);
5144   return target;
5145 }
5146
5147 /* Helper function for emit_store_flag.  */
5148 rtx
5149 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5150              machine_mode mode, machine_mode compare_mode,
5151              int unsignedp, rtx x, rtx y, int normalizep,
5152              machine_mode target_mode)
5153 {
5154   struct expand_operand ops[4];
5155   rtx op0, comparison, subtarget;
5156   rtx_insn *last;
5157   machine_mode result_mode = targetm.cstore_mode (icode);
5158
5159   last = get_last_insn ();
5160   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5161   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5162   if (!x || !y)
5163     {
5164       delete_insns_since (last);
5165       return NULL_RTX;
5166     }
5167
5168   if (target_mode == VOIDmode)
5169     target_mode = result_mode;
5170   if (!target)
5171     target = gen_reg_rtx (target_mode);
5172
5173   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5174
5175   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5176   create_fixed_operand (&ops[1], comparison);
5177   create_fixed_operand (&ops[2], x);
5178   create_fixed_operand (&ops[3], y);
5179   if (!maybe_expand_insn (icode, 4, ops))
5180     {
5181       delete_insns_since (last);
5182       return NULL_RTX;
5183     }
5184   subtarget = ops[0].value;
5185
5186   /* If we are converting to a wider mode, first convert to
5187      TARGET_MODE, then normalize.  This produces better combining
5188      opportunities on machines that have a SIGN_EXTRACT when we are
5189      testing a single bit.  This mostly benefits the 68k.
5190
5191      If STORE_FLAG_VALUE does not have the sign bit set when
5192      interpreted in MODE, we can do this conversion as unsigned, which
5193      is usually more efficient.  */
5194   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5195     {
5196       convert_move (target, subtarget,
5197                     val_signbit_known_clear_p (result_mode,
5198                                                STORE_FLAG_VALUE));
5199       op0 = target;
5200       result_mode = target_mode;
5201     }
5202   else
5203     op0 = subtarget;
5204
5205   /* If we want to keep subexpressions around, don't reuse our last
5206      target.  */
5207   if (optimize)
5208     subtarget = 0;
5209
5210   /* Now normalize to the proper value in MODE.  Sometimes we don't
5211      have to do anything.  */
5212   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5213     ;
5214   /* STORE_FLAG_VALUE might be the most negative number, so write
5215      the comparison this way to avoid a compiler-time warning.  */
5216   else if (- normalizep == STORE_FLAG_VALUE)
5217     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5218
5219   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5220      it hard to use a value of just the sign bit due to ANSI integer
5221      constant typing rules.  */
5222   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5223     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5224                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5225                         normalizep == 1);
5226   else
5227     {
5228       gcc_assert (STORE_FLAG_VALUE & 1);
5229
5230       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5231       if (normalizep == -1)
5232         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5233     }
5234
5235   /* If we were converting to a smaller mode, do the conversion now.  */
5236   if (target_mode != result_mode)
5237     {
5238       convert_move (target, op0, 0);
5239       return target;
5240     }
5241   else
5242     return op0;
5243 }
5244
5245
5246 /* A subroutine of emit_store_flag only including "tricks" that do not
5247    need a recursive call.  These are kept separate to avoid infinite
5248    loops.  */
5249
5250 static rtx
5251 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5252                    machine_mode mode, int unsignedp, int normalizep,
5253                    machine_mode target_mode)
5254 {
5255   rtx subtarget;
5256   enum insn_code icode;
5257   machine_mode compare_mode;
5258   enum mode_class mclass;
5259   enum rtx_code scode;
5260   rtx tem;
5261
5262   if (unsignedp)
5263     code = unsigned_condition (code);
5264   scode = swap_condition (code);
5265
5266   /* If one operand is constant, make it the second one.  Only do this
5267      if the other operand is not constant as well.  */
5268
5269   if (swap_commutative_operands_p (op0, op1))
5270     {
5271       tem = op0;
5272       op0 = op1;
5273       op1 = tem;
5274       code = swap_condition (code);
5275     }
5276
5277   if (mode == VOIDmode)
5278     mode = GET_MODE (op0);
5279
5280   /* For some comparisons with 1 and -1, we can convert this to
5281      comparisons with zero.  This will often produce more opportunities for
5282      store-flag insns.  */
5283
5284   switch (code)
5285     {
5286     case LT:
5287       if (op1 == const1_rtx)
5288         op1 = const0_rtx, code = LE;
5289       break;
5290     case LE:
5291       if (op1 == constm1_rtx)
5292         op1 = const0_rtx, code = LT;
5293       break;
5294     case GE:
5295       if (op1 == const1_rtx)
5296         op1 = const0_rtx, code = GT;
5297       break;
5298     case GT:
5299       if (op1 == constm1_rtx)
5300         op1 = const0_rtx, code = GE;
5301       break;
5302     case GEU:
5303       if (op1 == const1_rtx)
5304         op1 = const0_rtx, code = NE;
5305       break;
5306     case LTU:
5307       if (op1 == const1_rtx)
5308         op1 = const0_rtx, code = EQ;
5309       break;
5310     default:
5311       break;
5312     }
5313
5314   /* If we are comparing a double-word integer with zero or -1, we can
5315      convert the comparison into one involving a single word.  */
5316   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5317       && GET_MODE_CLASS (mode) == MODE_INT
5318       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5319     {
5320       if ((code == EQ || code == NE)
5321           && (op1 == const0_rtx || op1 == constm1_rtx))
5322         {
5323           rtx op00, op01;
5324
5325           /* Do a logical OR or AND of the two words and compare the
5326              result.  */
5327           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5328           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5329           tem = expand_binop (word_mode,
5330                               op1 == const0_rtx ? ior_optab : and_optab,
5331                               op00, op01, NULL_RTX, unsignedp,
5332                               OPTAB_DIRECT);
5333
5334           if (tem != 0)
5335             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5336                                    unsignedp, normalizep);
5337         }
5338       else if ((code == LT || code == GE) && op1 == const0_rtx)
5339         {
5340           rtx op0h;
5341
5342           /* If testing the sign bit, can just test on high word.  */
5343           op0h = simplify_gen_subreg (word_mode, op0, mode,
5344                                       subreg_highpart_offset (word_mode,
5345                                                               mode));
5346           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5347                                  unsignedp, normalizep);
5348         }
5349       else
5350         tem = NULL_RTX;
5351
5352       if (tem)
5353         {
5354           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5355             return tem;
5356           if (!target)
5357             target = gen_reg_rtx (target_mode);
5358
5359           convert_move (target, tem,
5360                         !val_signbit_known_set_p (word_mode,
5361                                                   (normalizep ? normalizep
5362                                                    : STORE_FLAG_VALUE)));
5363           return target;
5364         }
5365     }
5366
5367   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5368      complement of A (for GE) and shifting the sign bit to the low bit.  */
5369   if (op1 == const0_rtx && (code == LT || code == GE)
5370       && GET_MODE_CLASS (mode) == MODE_INT
5371       && (normalizep || STORE_FLAG_VALUE == 1
5372           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5373     {
5374       subtarget = target;
5375
5376       if (!target)
5377         target_mode = mode;
5378
5379       /* If the result is to be wider than OP0, it is best to convert it
5380          first.  If it is to be narrower, it is *incorrect* to convert it
5381          first.  */
5382       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5383         {
5384           op0 = convert_modes (target_mode, mode, op0, 0);
5385           mode = target_mode;
5386         }
5387
5388       if (target_mode != mode)
5389         subtarget = 0;
5390
5391       if (code == GE)
5392         op0 = expand_unop (mode, one_cmpl_optab, op0,
5393                            ((STORE_FLAG_VALUE == 1 || normalizep)
5394                             ? 0 : subtarget), 0);
5395
5396       if (STORE_FLAG_VALUE == 1 || normalizep)
5397         /* If we are supposed to produce a 0/1 value, we want to do
5398            a logical shift from the sign bit to the low-order bit; for
5399            a -1/0 value, we do an arithmetic shift.  */
5400         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5401                             GET_MODE_BITSIZE (mode) - 1,
5402                             subtarget, normalizep != -1);
5403
5404       if (mode != target_mode)
5405         op0 = convert_modes (target_mode, mode, op0, 0);
5406
5407       return op0;
5408     }
5409
5410   mclass = GET_MODE_CLASS (mode);
5411   for (compare_mode = mode; compare_mode != VOIDmode;
5412        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5413     {
5414      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5415      icode = optab_handler (cstore_optab, optab_mode);
5416      if (icode != CODE_FOR_nothing)
5417         {
5418           do_pending_stack_adjust ();
5419           tem = emit_cstore (target, icode, code, mode, compare_mode,
5420                              unsignedp, op0, op1, normalizep, target_mode);
5421           if (tem)
5422             return tem;
5423
5424           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5425             {
5426               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5427                                  unsignedp, op1, op0, normalizep, target_mode);
5428               if (tem)
5429                 return tem;
5430             }
5431           break;
5432         }
5433     }
5434
5435   return 0;
5436 }
5437
5438 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5439    and storing in TARGET.  Normally return TARGET.
5440    Return 0 if that cannot be done.
5441
5442    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5443    it is VOIDmode, they cannot both be CONST_INT.
5444
5445    UNSIGNEDP is for the case where we have to widen the operands
5446    to perform the operation.  It says to use zero-extension.
5447
5448    NORMALIZEP is 1 if we should convert the result to be either zero
5449    or one.  Normalize is -1 if we should convert the result to be
5450    either zero or -1.  If NORMALIZEP is zero, the result will be left
5451    "raw" out of the scc insn.  */
5452
5453 rtx
5454 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5455                  machine_mode mode, int unsignedp, int normalizep)
5456 {
5457   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5458   enum rtx_code rcode;
5459   rtx subtarget;
5460   rtx tem, trueval;
5461   rtx_insn *last;
5462
5463   /* If we compare constants, we shouldn't use a store-flag operation,
5464      but a constant load.  We can get there via the vanilla route that
5465      usually generates a compare-branch sequence, but will in this case
5466      fold the comparison to a constant, and thus elide the branch.  */
5467   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5468     return NULL_RTX;
5469
5470   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5471                            target_mode);
5472   if (tem)
5473     return tem;
5474
5475   /* If we reached here, we can't do this with a scc insn, however there
5476      are some comparisons that can be done in other ways.  Don't do any
5477      of these cases if branches are very cheap.  */
5478   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5479     return 0;
5480
5481   /* See what we need to return.  We can only return a 1, -1, or the
5482      sign bit.  */
5483
5484   if (normalizep == 0)
5485     {
5486       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5487         normalizep = STORE_FLAG_VALUE;
5488
5489       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5490         ;
5491       else
5492         return 0;
5493     }
5494
5495   last = get_last_insn ();
5496
5497   /* If optimizing, use different pseudo registers for each insn, instead
5498      of reusing the same pseudo.  This leads to better CSE, but slows
5499      down the compiler, since there are more pseudos */
5500   subtarget = (!optimize
5501                && (target_mode == mode)) ? target : NULL_RTX;
5502   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5503
5504   /* For floating-point comparisons, try the reverse comparison or try
5505      changing the "orderedness" of the comparison.  */
5506   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5507     {
5508       enum rtx_code first_code;
5509       bool and_them;
5510
5511       rcode = reverse_condition_maybe_unordered (code);
5512       if (can_compare_p (rcode, mode, ccp_store_flag)
5513           && (code == ORDERED || code == UNORDERED
5514               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5515               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5516         {
5517           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5518                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5519
5520           /* For the reverse comparison, use either an addition or a XOR.  */
5521           if (want_add
5522               && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5523                            optimize_insn_for_speed_p ()) == 0)
5524             {
5525               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5526                                        STORE_FLAG_VALUE, target_mode);
5527               if (tem)
5528                 return expand_binop (target_mode, add_optab, tem,
5529                                      gen_int_mode (normalizep, target_mode),
5530                                      target, 0, OPTAB_WIDEN);
5531             }
5532           else if (!want_add
5533                    && rtx_cost (trueval, XOR, 1,
5534                                 optimize_insn_for_speed_p ()) == 0)
5535             {
5536               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5537                                        normalizep, target_mode);
5538               if (tem)
5539                 return expand_binop (target_mode, xor_optab, tem, trueval,
5540                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5541             }
5542         }
5543
5544       delete_insns_since (last);
5545
5546       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5547       if (code == ORDERED || code == UNORDERED)
5548         return 0;
5549
5550       and_them = split_comparison (code, mode, &first_code, &code);
5551
5552       /* If there are no NaNs, the first comparison should always fall through.
5553          Effectively change the comparison to the other one.  */
5554       if (!HONOR_NANS (mode))
5555         {
5556           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5557           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5558                                     target_mode);
5559         }
5560
5561 #ifdef HAVE_conditional_move
5562       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5563          conditional move.  */
5564       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5565                                normalizep, target_mode);
5566       if (tem == 0)
5567         return 0;
5568
5569       if (and_them)
5570         tem = emit_conditional_move (target, code, op0, op1, mode,
5571                                      tem, const0_rtx, GET_MODE (tem), 0);
5572       else
5573         tem = emit_conditional_move (target, code, op0, op1, mode,
5574                                      trueval, tem, GET_MODE (tem), 0);
5575
5576       if (tem == 0)
5577         delete_insns_since (last);
5578       return tem;
5579 #else
5580       return 0;
5581 #endif
5582     }
5583
5584   /* The remaining tricks only apply to integer comparisons.  */
5585
5586   if (GET_MODE_CLASS (mode) != MODE_INT)
5587     return 0;
5588
5589   /* If this is an equality comparison of integers, we can try to exclusive-or
5590      (or subtract) the two operands and use a recursive call to try the
5591      comparison with zero.  Don't do any of these cases if branches are
5592      very cheap.  */
5593
5594   if ((code == EQ || code == NE) && op1 != const0_rtx)
5595     {
5596       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5597                           OPTAB_WIDEN);
5598
5599       if (tem == 0)
5600         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5601                             OPTAB_WIDEN);
5602       if (tem != 0)
5603         tem = emit_store_flag (target, code, tem, const0_rtx,
5604                                mode, unsignedp, normalizep);
5605       if (tem != 0)
5606         return tem;
5607
5608       delete_insns_since (last);
5609     }
5610
5611   /* For integer comparisons, try the reverse comparison.  However, for
5612      small X and if we'd have anyway to extend, implementing "X != 0"
5613      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5614   rcode = reverse_condition (code);
5615   if (can_compare_p (rcode, mode, ccp_store_flag)
5616       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5617             && code == NE
5618             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5619             && op1 == const0_rtx))
5620     {
5621       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5622                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5623
5624       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5625       if (want_add
5626           && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5627                        optimize_insn_for_speed_p ()) == 0)
5628         {
5629           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5630                                    STORE_FLAG_VALUE, target_mode);
5631           if (tem != 0)
5632             tem = expand_binop (target_mode, add_optab, tem,
5633                                 gen_int_mode (normalizep, target_mode),
5634                                 target, 0, OPTAB_WIDEN);
5635         }
5636       else if (!want_add
5637                && rtx_cost (trueval, XOR, 1,
5638                             optimize_insn_for_speed_p ()) == 0)
5639         {
5640           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5641                                    normalizep, target_mode);
5642           if (tem != 0)
5643             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5644                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5645         }
5646
5647       if (tem != 0)
5648         return tem;
5649       delete_insns_since (last);
5650     }
5651
5652   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5653      the constant zero.  Reject all other comparisons at this point.  Only
5654      do LE and GT if branches are expensive since they are expensive on
5655      2-operand machines.  */
5656
5657   if (op1 != const0_rtx
5658       || (code != EQ && code != NE
5659           && (BRANCH_COST (optimize_insn_for_speed_p (),
5660                            false) <= 1 || (code != LE && code != GT))))
5661     return 0;
5662
5663   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5664      do the necessary operation below.  */
5665
5666   tem = 0;
5667
5668   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5669      the sign bit set.  */
5670
5671   if (code == LE)
5672     {
5673       /* This is destructive, so SUBTARGET can't be OP0.  */
5674       if (rtx_equal_p (subtarget, op0))
5675         subtarget = 0;
5676
5677       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5678                           OPTAB_WIDEN);
5679       if (tem)
5680         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5681                             OPTAB_WIDEN);
5682     }
5683
5684   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5685      number of bits in the mode of OP0, minus one.  */
5686
5687   if (code == GT)
5688     {
5689       if (rtx_equal_p (subtarget, op0))
5690         subtarget = 0;
5691
5692       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5693                           GET_MODE_BITSIZE (mode) - 1,
5694                           subtarget, 0);
5695       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5696                           OPTAB_WIDEN);
5697     }
5698
5699   if (code == EQ || code == NE)
5700     {
5701       /* For EQ or NE, one way to do the comparison is to apply an operation
5702          that converts the operand into a positive number if it is nonzero
5703          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5704          for NE we negate.  This puts the result in the sign bit.  Then we
5705          normalize with a shift, if needed.
5706
5707          Two operations that can do the above actions are ABS and FFS, so try
5708          them.  If that doesn't work, and MODE is smaller than a full word,
5709          we can use zero-extension to the wider mode (an unsigned conversion)
5710          as the operation.  */
5711
5712       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5713          that is compensated by the subsequent overflow when subtracting
5714          one / negating.  */
5715
5716       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5717         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5718       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5719         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5720       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5721         {
5722           tem = convert_modes (word_mode, mode, op0, 1);
5723           mode = word_mode;
5724         }
5725
5726       if (tem != 0)
5727         {
5728           if (code == EQ)
5729             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5730                                 0, OPTAB_WIDEN);
5731           else
5732             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5733         }
5734
5735       /* If we couldn't do it that way, for NE we can "or" the two's complement
5736          of the value with itself.  For EQ, we take the one's complement of
5737          that "or", which is an extra insn, so we only handle EQ if branches
5738          are expensive.  */
5739
5740       if (tem == 0
5741           && (code == NE
5742               || BRANCH_COST (optimize_insn_for_speed_p (),
5743                               false) > 1))
5744         {
5745           if (rtx_equal_p (subtarget, op0))
5746             subtarget = 0;
5747
5748           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5749           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5750                               OPTAB_WIDEN);
5751
5752           if (tem && code == EQ)
5753             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5754         }
5755     }
5756
5757   if (tem && normalizep)
5758     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5759                         GET_MODE_BITSIZE (mode) - 1,
5760                         subtarget, normalizep == 1);
5761
5762   if (tem)
5763     {
5764       if (!target)
5765         ;
5766       else if (GET_MODE (tem) != target_mode)
5767         {
5768           convert_move (target, tem, 0);
5769           tem = target;
5770         }
5771       else if (!subtarget)
5772         {
5773           emit_move_insn (target, tem);
5774           tem = target;
5775         }
5776     }
5777   else
5778     delete_insns_since (last);
5779
5780   return tem;
5781 }
5782
5783 /* Like emit_store_flag, but always succeeds.  */
5784
5785 rtx
5786 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5787                        machine_mode mode, int unsignedp, int normalizep)
5788 {
5789   rtx tem;
5790   rtx_code_label *label;
5791   rtx trueval, falseval;
5792
5793   /* First see if emit_store_flag can do the job.  */
5794   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5795   if (tem != 0)
5796     return tem;
5797
5798   if (!target)
5799     target = gen_reg_rtx (word_mode);
5800
5801   /* If this failed, we have to do this with set/compare/jump/set code.
5802      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5803   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5804   if (code == NE
5805       && GET_MODE_CLASS (mode) == MODE_INT
5806       && REG_P (target)
5807       && op0 == target
5808       && op1 == const0_rtx)
5809     {
5810       label = gen_label_rtx ();
5811       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5812                                mode, NULL_RTX, NULL_RTX, label, -1);
5813       emit_move_insn (target, trueval);
5814       emit_label (label);
5815       return target;
5816     }
5817
5818   if (!REG_P (target)
5819       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5820     target = gen_reg_rtx (GET_MODE (target));
5821
5822   /* Jump in the right direction if the target cannot implement CODE
5823      but can jump on its reverse condition.  */
5824   falseval = const0_rtx;
5825   if (! can_compare_p (code, mode, ccp_jump)
5826       && (! FLOAT_MODE_P (mode)
5827           || code == ORDERED || code == UNORDERED
5828           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5829           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5830     {
5831       enum rtx_code rcode;
5832       if (FLOAT_MODE_P (mode))
5833         rcode = reverse_condition_maybe_unordered (code);
5834       else
5835         rcode = reverse_condition (code);
5836
5837       /* Canonicalize to UNORDERED for the libcall.  */
5838       if (can_compare_p (rcode, mode, ccp_jump)
5839           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5840         {
5841           falseval = trueval;
5842           trueval = const0_rtx;
5843           code = rcode;
5844         }
5845     }
5846
5847   emit_move_insn (target, trueval);
5848   label = gen_label_rtx ();
5849   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5850                            NULL_RTX, label, -1);
5851
5852   emit_move_insn (target, falseval);
5853   emit_label (label);
5854
5855   return target;
5856 }
5857 \f
5858 /* Perform possibly multi-word comparison and conditional jump to LABEL
5859    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5860    now a thin wrapper around do_compare_rtx_and_jump.  */
5861
5862 static void
5863 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
5864                  rtx_code_label *label)
5865 {
5866   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5867   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5868                            NULL_RTX, NULL_RTX, label, -1);
5869 }