gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2015 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "diagnostic-core.h"
  27 #include "rtl.h"
  28 #include "hash-set.h"
  29 #include "machmode.h"
  30 #include "vec.h"
  31 #include "double-int.h"
  32 #include "input.h"
  33 #include "alias.h"
  34 #include "symtab.h"
  35 #include "wide-int.h"
  36 #include "inchash.h"
  37 #include "tree.h"
  38 #include "fold-const.h"
  39 #include "stor-layout.h"
  40 #include "tm_p.h"
  41 #include "flags.h"
  42 #include "insn-config.h"
  43 #include "hashtab.h"
  44 #include "hard-reg-set.h"
  45 #include "function.h"
  46 #include "statistics.h"
  47 #include "real.h"
  48 #include "fixed-value.h"
  49 #include "expmed.h"
  50 #include "dojump.h"
  51 #include "explow.h"
  52 #include "calls.h"
  53 #include "emit-rtl.h"
  54 #include "varasm.h"
  55 #include "stmt.h"
  56 #include "expr.h"
  57 #include "insn-codes.h"
  58 #include "optabs.h"
  59 #include "recog.h"
  60 #include "langhooks.h"
  61 #include "predict.h"
  62 #include "basic-block.h"
  63 #include "df.h"
  64 #include "target.h"
  65
  66 struct target_expmed default_target_expmed;
  67 #if SWITCHABLE_TARGET
  68 struct target_expmed *this_target_expmed = &default_target_expmed;
  69 #endif
  70
  71 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  72                                    unsigned HOST_WIDE_INT,
  73                                    unsigned HOST_WIDE_INT,
  74                                    unsigned HOST_WIDE_INT,
  75                                    rtx);
  76 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
  77                                      unsigned HOST_WIDE_INT,
  78                                      rtx);
  79 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  80                                    unsigned HOST_WIDE_INT,
  81                                    unsigned HOST_WIDE_INT,
  82                                    unsigned HOST_WIDE_INT,
  83                                    rtx);
  84 static rtx extract_fixed_bit_field (machine_mode, rtx,
  85                                     unsigned HOST_WIDE_INT,
  86                                     unsigned HOST_WIDE_INT, rtx, int);
  87 static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
  88                                       unsigned HOST_WIDE_INT,
  89                                       unsigned HOST_WIDE_INT, rtx, int);
  90 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  91 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  92                                     unsigned HOST_WIDE_INT, int);
  93 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  94 static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  95 static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  96
  97 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  98    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  99    The mask is truncated if necessary to the width of mode MODE.  The
 100    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
 101
 102 static inline rtx
 103 mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
 104 {
 105   return immed_wide_int_const
 106     (wi::shifted_mask (bitpos, bitsize, complement,
 107                        GET_MODE_PRECISION (mode)), mode);
 108 }
 109
 110 /* Test whether a value is zero of a power of two.  */
 111 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
 112   (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
 113
 114 struct init_expmed_rtl
 115 {
 116   rtx reg;
 117   rtx plus;
 118   rtx neg;
 119   rtx mult;
 120   rtx sdiv;
 121   rtx udiv;
 122   rtx sdiv_32;
 123   rtx smod_32;
 124   rtx wide_mult;
 125   rtx wide_lshr;
 126   rtx wide_trunc;
 127   rtx shift;
 128   rtx shift_mult;
 129   rtx shift_add;
 130   rtx shift_sub0;
 131   rtx shift_sub1;
 132   rtx zext;
 133   rtx trunc;
 134
 135   rtx pow2[MAX_BITS_PER_WORD];
 136   rtx cint[MAX_BITS_PER_WORD];
 137 };
 138
 139 static void
 140 init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
 141                       machine_mode from_mode, bool speed)
 142 {
 143   int to_size, from_size;
 144   rtx which;
 145
 146   to_size = GET_MODE_PRECISION (to_mode);
 147   from_size = GET_MODE_PRECISION (from_mode);
 148
 149   /* Most partial integers have a precision less than the "full"
 150      integer it requires for storage.  In case one doesn't, for
 151      comparison purposes here, reduce the bit size by one in that
 152      case.  */
 153   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 154       && exact_log2 (to_size) != -1)
 155     to_size --;
 156   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 157       && exact_log2 (from_size) != -1)
 158     from_size --;
 159
 160   /* Assume cost of zero-extend and sign-extend is the same.  */
 161   which = (to_size < from_size ? all->trunc : all->zext);
 162
 163   PUT_MODE (all->reg, from_mode);
 164   set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
 165 }
 166
 167 static void
 168 init_expmed_one_mode (struct init_expmed_rtl *all,
 169                       machine_mode mode, int speed)
 170 {
 171   int m, n, mode_bitsize;
 172   machine_mode mode_from;
 173
 174   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 175
 176   PUT_MODE (all->reg, mode);
 177   PUT_MODE (all->plus, mode);
 178   PUT_MODE (all->neg, mode);
 179   PUT_MODE (all->mult, mode);
 180   PUT_MODE (all->sdiv, mode);
 181   PUT_MODE (all->udiv, mode);
 182   PUT_MODE (all->sdiv_32, mode);
 183   PUT_MODE (all->smod_32, mode);
 184   PUT_MODE (all->wide_trunc, mode);
 185   PUT_MODE (all->shift, mode);
 186   PUT_MODE (all->shift_mult, mode);
 187   PUT_MODE (all->shift_add, mode);
 188   PUT_MODE (all->shift_sub0, mode);
 189   PUT_MODE (all->shift_sub1, mode);
 190   PUT_MODE (all->zext, mode);
 191   PUT_MODE (all->trunc, mode);
 192
 193   set_add_cost (speed, mode, set_src_cost (all->plus, speed));
 194   set_neg_cost (speed, mode, set_src_cost (all->neg, speed));
 195   set_mul_cost (speed, mode, set_src_cost (all->mult, speed));
 196   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, speed));
 197   set_udiv_cost (speed, mode, set_src_cost (all->udiv, speed));
 198
 199   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, speed)
 200                                      <= 2 * add_cost (speed, mode)));
 201   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, speed)
 202                                      <= 4 * add_cost (speed, mode)));
 203
 204   set_shift_cost (speed, mode, 0, 0);
 205   {
 206     int cost = add_cost (speed, mode);
 207     set_shiftadd_cost (speed, mode, 0, cost);
 208     set_shiftsub0_cost (speed, mode, 0, cost);
 209     set_shiftsub1_cost (speed, mode, 0, cost);
 210   }
 211
 212   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 213   for (m = 1; m < n; m++)
 214     {
 215       XEXP (all->shift, 1) = all->cint[m];
 216       XEXP (all->shift_mult, 1) = all->pow2[m];
 217
 218       set_shift_cost (speed, mode, m, set_src_cost (all->shift, speed));
 219       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, speed));
 220       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, speed));
 221       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, speed));
 222     }
 223
 224   if (SCALAR_INT_MODE_P (mode))
 225     {
 226       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 227            mode_from = (machine_mode)(mode_from + 1))
 228         init_expmed_one_conv (all, mode, mode_from, speed);
 229     }
 230   if (GET_MODE_CLASS (mode) == MODE_INT)
 231     {
 232       machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
 233       if (wider_mode != VOIDmode)
 234         {
 235           PUT_MODE (all->zext, wider_mode);
 236           PUT_MODE (all->wide_mult, wider_mode);
 237           PUT_MODE (all->wide_lshr, wider_mode);
 238           XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 239
 240           set_mul_widen_cost (speed, wider_mode,
 241                               set_src_cost (all->wide_mult, speed));
 242           set_mul_highpart_cost (speed, mode,
 243                                  set_src_cost (all->wide_trunc, speed));
 244         }
 245     }
 246 }
 247
 248 void
 249 init_expmed (void)
 250 {
 251   struct init_expmed_rtl all;
 252   machine_mode mode = QImode;
 253   int m, speed;
 254
 255   memset (&all, 0, sizeof all);
 256   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 257     {
 258       all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 259       all.cint[m] = GEN_INT (m);
 260     }
 261
 262   /* Avoid using hard regs in ways which may be unsupported.  */
 263   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 264   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 265   all.neg = gen_rtx_NEG (mode, all.reg);
 266   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 267   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 268   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 269   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 270   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 271   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 272   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 273   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 274   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 275   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 276   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 277   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 278   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 279   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 280   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 281
 282   for (speed = 0; speed < 2; speed++)
 283     {
 284       crtl->maybe_hot_insn_p = speed;
 285       set_zero_cost (speed, set_src_cost (const0_rtx, speed));
 286
 287       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 288            mode = (machine_mode)(mode + 1))
 289         init_expmed_one_mode (&all, mode, speed);
 290
 291       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 292         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 293              mode = (machine_mode)(mode + 1))
 294           init_expmed_one_mode (&all, mode, speed);
 295
 296       if (MIN_MODE_VECTOR_INT != VOIDmode)
 297         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 298              mode = (machine_mode)(mode + 1))
 299           init_expmed_one_mode (&all, mode, speed);
 300     }
 301
 302   if (alg_hash_used_p ())
 303     {
 304       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 305       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 306     }
 307   else
 308     set_alg_hash_used_p (true);
 309   default_rtl_profile ();
 310
 311   ggc_free (all.trunc);
 312   ggc_free (all.shift_sub1);
 313   ggc_free (all.shift_sub0);
 314   ggc_free (all.shift_add);
 315   ggc_free (all.shift_mult);
 316   ggc_free (all.shift);
 317   ggc_free (all.wide_trunc);
 318   ggc_free (all.wide_lshr);
 319   ggc_free (all.wide_mult);
 320   ggc_free (all.zext);
 321   ggc_free (all.smod_32);
 322   ggc_free (all.sdiv_32);
 323   ggc_free (all.udiv);
 324   ggc_free (all.sdiv);
 325   ggc_free (all.mult);
 326   ggc_free (all.neg);
 327   ggc_free (all.plus);
 328   ggc_free (all.reg);
 329 }
 330
 331 /* Return an rtx representing minus the value of X.
 332    MODE is the intended mode of the result,
 333    useful if X is a CONST_INT.  */
 334
 335 rtx
 336 negate_rtx (machine_mode mode, rtx x)
 337 {
 338   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 339
 340   if (result == 0)
 341     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 342
 343   return result;
 344 }
 345
 346 /* Adjust bitfield memory MEM so that it points to the first unit of mode
 347    MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
 348    If MODE is BLKmode, return a reference to every byte in the bitfield.
 349    Set *NEW_BITNUM to the bit position of the field within the new memory.  */
 350
 351 static rtx
 352 narrow_bit_field_mem (rtx mem, machine_mode mode,
 353                       unsigned HOST_WIDE_INT bitsize,
 354                       unsigned HOST_WIDE_INT bitnum,
 355                       unsigned HOST_WIDE_INT *new_bitnum)
 356 {
 357   if (mode == BLKmode)
 358     {
 359       *new_bitnum = bitnum % BITS_PER_UNIT;
 360       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 361       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 362                             / BITS_PER_UNIT);
 363       return adjust_bitfield_address_size (mem, mode, offset, size);
 364     }
 365   else
 366     {
 367       unsigned int unit = GET_MODE_BITSIZE (mode);
 368       *new_bitnum = bitnum % unit;
 369       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 370       return adjust_bitfield_address (mem, mode, offset);
 371     }
 372 }
 373
 374 /* The caller wants to perform insertion or extraction PATTERN on a
 375    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 376    BITREGION_START and BITREGION_END are as for store_bit_field
 377    and FIELDMODE is the natural mode of the field.
 378
 379    Search for a mode that is compatible with the memory access
 380    restrictions and (where applicable) with a register insertion or
 381    extraction.  Return the new memory on success, storing the adjusted
 382    bit position in *NEW_BITNUM.  Return null otherwise.  */
 383
 384 static rtx
 385 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 386                               rtx op0, HOST_WIDE_INT bitsize,
 387                               HOST_WIDE_INT bitnum,
 388                               unsigned HOST_WIDE_INT bitregion_start,
 389                               unsigned HOST_WIDE_INT bitregion_end,
 390                               machine_mode fieldmode,
 391                               unsigned HOST_WIDE_INT *new_bitnum)
 392 {
 393   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 394                                 bitregion_end, MEM_ALIGN (op0),
 395                                 MEM_VOLATILE_P (op0));
 396   machine_mode best_mode;
 397   if (iter.next_mode (&best_mode))
 398     {
 399       /* We can use a memory in BEST_MODE.  See whether this is true for
 400          any wider modes.  All other things being equal, we prefer to
 401          use the widest mode possible because it tends to expose more
 402          CSE opportunities.  */
 403       if (!iter.prefer_smaller_modes ())
 404         {
 405           /* Limit the search to the mode required by the corresponding
 406              register insertion or extraction instruction, if any.  */
 407           machine_mode limit_mode = word_mode;
 408           extraction_insn insn;
 409           if (get_best_reg_extraction_insn (&insn, pattern,
 410                                             GET_MODE_BITSIZE (best_mode),
 411                                             fieldmode))
 412             limit_mode = insn.field_mode;
 413
 414           machine_mode wider_mode;
 415           while (iter.next_mode (&wider_mode)
 416                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 417             best_mode = wider_mode;
 418         }
 419       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 420                                    new_bitnum);
 421     }
 422   return NULL_RTX;
 423 }
 424
 425 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 426    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 427    offset is then BITNUM / BITS_PER_UNIT.  */
 428
 429 static bool
 430 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 431                      unsigned HOST_WIDE_INT bitsize,
 432                      machine_mode struct_mode)
 433 {
 434   if (BYTES_BIG_ENDIAN)
 435     return (bitnum % BITS_PER_UNIT == 0
 436             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 437                 || (bitnum + bitsize) % BITS_PER_WORD == 0));
 438   else
 439     return bitnum % BITS_PER_WORD == 0;
 440 }
 441
 442 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 443    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 444    Return false if the access would touch memory outside the range
 445    BITREGION_START to BITREGION_END for conformance to the C++ memory
 446    model.  */
 447
 448 static bool
 449 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 450                             unsigned HOST_WIDE_INT bitnum,
 451                             machine_mode fieldmode,
 452                             unsigned HOST_WIDE_INT bitregion_start,
 453                             unsigned HOST_WIDE_INT bitregion_end)
 454 {
 455   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 456
 457   /* -fstrict-volatile-bitfields must be enabled and we must have a
 458      volatile MEM.  */
 459   if (!MEM_P (op0)
 460       || !MEM_VOLATILE_P (op0)
 461       || flag_strict_volatile_bitfields <= 0)
 462     return false;
 463
 464   /* Non-integral modes likely only happen with packed structures.
 465      Punt.  */
 466   if (!SCALAR_INT_MODE_P (fieldmode))
 467     return false;
 468
 469   /* The bit size must not be larger than the field mode, and
 470      the field mode must not be larger than a word.  */
 471   if (bitsize > modesize || modesize > BITS_PER_WORD)
 472     return false;
 473
 474   /* Check for cases of unaligned fields that must be split.  */
 475   if (bitnum % modesize + bitsize > modesize)
 476     return false;
 477
 478   /* The memory must be sufficiently aligned for a MODESIZE access.
 479      This condition guarantees, that the memory access will not
 480      touch anything after the end of the structure.  */
 481   if (MEM_ALIGN (op0) < modesize)
 482     return false;
 483
 484   /* Check for cases where the C++ memory model applies.  */
 485   if (bitregion_end != 0
 486       && (bitnum - bitnum % modesize < bitregion_start
 487           || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
 488     return false;
 489
 490   return true;
 491 }
 492
 493 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 494    bit number BITNUM can be treated as a simple value of mode MODE.  */
 495
 496 static bool
 497 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 498                        unsigned HOST_WIDE_INT bitnum, machine_mode mode)
 499 {
 500   return (MEM_P (op0)
 501           && bitnum % BITS_PER_UNIT == 0
 502           && bitsize == GET_MODE_BITSIZE (mode)
 503           && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
 504               || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
 505                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 506 }
 507 \f
 508 /* Try to use instruction INSV to store VALUE into a field of OP0.
 509    BITSIZE and BITNUM are as for store_bit_field.  */
 510
 511 static bool
 512 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 513                             unsigned HOST_WIDE_INT bitsize,
 514                             unsigned HOST_WIDE_INT bitnum,
 515                             rtx value)
 516 {
 517   struct expand_operand ops[4];
 518   rtx value1;
 519   rtx xop0 = op0;
 520   rtx_insn *last = get_last_insn ();
 521   bool copy_back = false;
 522
 523   machine_mode op_mode = insv->field_mode;
 524   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 525   if (bitsize == 0 || bitsize > unit)
 526     return false;
 527
 528   if (MEM_P (xop0))
 529     /* Get a reference to the first byte of the field.  */
 530     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 531                                  &bitnum);
 532   else
 533     {
 534       /* Convert from counting within OP0 to counting in OP_MODE.  */
 535       if (BYTES_BIG_ENDIAN)
 536         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 537
 538       /* If xop0 is a register, we need it in OP_MODE
 539          to make it acceptable to the format of insv.  */
 540       if (GET_CODE (xop0) == SUBREG)
 541         /* We can't just change the mode, because this might clobber op0,
 542            and we will need the original value of op0 if insv fails.  */
 543         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 544       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 545         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 546     }
 547
 548   /* If the destination is a paradoxical subreg such that we need a
 549      truncate to the inner mode, perform the insertion on a temporary and
 550      truncate the result to the original destination.  Note that we can't
 551      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 552      X) 0)) is (reg:N X).  */
 553   if (GET_CODE (xop0) == SUBREG
 554       && REG_P (SUBREG_REG (xop0))
 555       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 556                                          op_mode))
 557     {
 558       rtx tem = gen_reg_rtx (op_mode);
 559       emit_move_insn (tem, xop0);
 560       xop0 = tem;
 561       copy_back = true;
 562     }
 563
 564   /* There are similar overflow check at the start of store_bit_field_1,
 565      but that only check the situation where the field lies completely
 566      outside the register, while there do have situation where the field
 567      lies partialy in the register, we need to adjust bitsize for this
 568      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
 569      will broken on those arch support bit insert instruction, like arm, aarch64
 570      etc.  */
 571   if (bitsize + bitnum > unit && bitnum < unit)
 572     {
 573       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
 574                "destination object, data truncated into %wu-bit",
 575                bitsize, unit - bitnum);
 576       bitsize = unit - bitnum;
 577     }
 578
 579   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 580      "backwards" from the size of the unit we are inserting into.
 581      Otherwise, we count bits from the most significant on a
 582      BYTES/BITS_BIG_ENDIAN machine.  */
 583
 584   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 585     bitnum = unit - bitsize - bitnum;
 586
 587   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 588   value1 = value;
 589   if (GET_MODE (value) != op_mode)
 590     {
 591       if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 592         {
 593           /* Optimization: Don't bother really extending VALUE
 594              if it has all the bits we will actually use.  However,
 595              if we must narrow it, be sure we do it correctly.  */
 596
 597           if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 598             {
 599               rtx tmp;
 600
 601               tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 602               if (! tmp)
 603                 tmp = simplify_gen_subreg (op_mode,
 604                                            force_reg (GET_MODE (value),
 605                                                       value1),
 606                                            GET_MODE (value), 0);
 607               value1 = tmp;
 608             }
 609           else
 610             value1 = gen_lowpart (op_mode, value1);
 611         }
 612       else if (CONST_INT_P (value))
 613         value1 = gen_int_mode (INTVAL (value), op_mode);
 614       else
 615         /* Parse phase is supposed to make VALUE's data type
 616            match that of the component reference, which is a type
 617            at least as wide as the field; so VALUE should have
 618            a mode that corresponds to that type.  */
 619         gcc_assert (CONSTANT_P (value));
 620     }
 621
 622   create_fixed_operand (&ops[0], xop0);
 623   create_integer_operand (&ops[1], bitsize);
 624   create_integer_operand (&ops[2], bitnum);
 625   create_input_operand (&ops[3], value1, op_mode);
 626   if (maybe_expand_insn (insv->icode, 4, ops))
 627     {
 628       if (copy_back)
 629         convert_move (op0, xop0, true);
 630       return true;
 631     }
 632   delete_insns_since (last);
 633   return false;
 634 }
 635
 636 /* A subroutine of store_bit_field, with the same arguments.  Return true
 637    if the operation could be implemented.
 638
 639    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 640    no other way of implementing the operation.  If FALLBACK_P is false,
 641    return false instead.  */
 642
 643 static bool
 644 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 645                    unsigned HOST_WIDE_INT bitnum,
 646                    unsigned HOST_WIDE_INT bitregion_start,
 647                    unsigned HOST_WIDE_INT bitregion_end,
 648                    machine_mode fieldmode,
 649                    rtx value, bool fallback_p)
 650 {
 651   rtx op0 = str_rtx;
 652   rtx orig_value;
 653
 654   while (GET_CODE (op0) == SUBREG)
 655     {
 656       /* The following line once was done only if WORDS_BIG_ENDIAN,
 657          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 658          meaningful at a much higher level; when structures are copied
 659          between memory and regs, the higher-numbered regs
 660          always get higher addresses.  */
 661       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 662       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 663       int byte_offset = 0;
 664
 665       /* Paradoxical subregs need special handling on big endian machines.  */
 666       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 667         {
 668           int difference = inner_mode_size - outer_mode_size;
 669
 670           if (WORDS_BIG_ENDIAN)
 671             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 672           if (BYTES_BIG_ENDIAN)
 673             byte_offset += difference % UNITS_PER_WORD;
 674         }
 675       else
 676         byte_offset = SUBREG_BYTE (op0);
 677
 678       bitnum += byte_offset * BITS_PER_UNIT;
 679       op0 = SUBREG_REG (op0);
 680     }
 681
 682   /* No action is needed if the target is a register and if the field
 683      lies completely outside that register.  This can occur if the source
 684      code contains an out-of-bounds access to a small array.  */
 685   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 686     return true;
 687
 688   /* Use vec_set patterns for inserting parts of vectors whenever
 689      available.  */
 690   if (VECTOR_MODE_P (GET_MODE (op0))
 691       && !MEM_P (op0)
 692       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 693       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 694       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 695       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 696     {
 697       struct expand_operand ops[3];
 698       machine_mode outermode = GET_MODE (op0);
 699       machine_mode innermode = GET_MODE_INNER (outermode);
 700       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 701       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 702
 703       create_fixed_operand (&ops[0], op0);
 704       create_input_operand (&ops[1], value, innermode);
 705       create_integer_operand (&ops[2], pos);
 706       if (maybe_expand_insn (icode, 3, ops))
 707         return true;
 708     }
 709
 710   /* If the target is a register, overwriting the entire object, or storing
 711      a full-word or multi-word field can be done with just a SUBREG.  */
 712   if (!MEM_P (op0)
 713       && bitsize == GET_MODE_BITSIZE (fieldmode)
 714       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 715           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 716     {
 717       /* Use the subreg machinery either to narrow OP0 to the required
 718          words or to cope with mode punning between equal-sized modes.
 719          In the latter case, use subreg on the rhs side, not lhs.  */
 720       rtx sub;
 721
 722       if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
 723         {
 724           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 725           if (sub)
 726             {
 727               emit_move_insn (op0, sub);
 728               return true;
 729             }
 730         }
 731       else
 732         {
 733           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 734                                      bitnum / BITS_PER_UNIT);
 735           if (sub)
 736             {
 737               emit_move_insn (sub, value);
 738               return true;
 739             }
 740         }
 741     }
 742
 743   /* If the target is memory, storing any naturally aligned field can be
 744      done with a simple store.  For targets that support fast unaligned
 745      memory, any naturally sized, unit aligned field can be done directly.  */
 746   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
 747     {
 748       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 749       emit_move_insn (op0, value);
 750       return true;
 751     }
 752
 753   /* Make sure we are playing with integral modes.  Pun with subregs
 754      if we aren't.  This must come after the entire register case above,
 755      since that case is valid for any mode.  The following cases are only
 756      valid for integral modes.  */
 757   {
 758     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 759     if (imode != GET_MODE (op0))
 760       {
 761         if (MEM_P (op0))
 762           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
 763         else
 764           {
 765             gcc_assert (imode != BLKmode);
 766             op0 = gen_lowpart (imode, op0);
 767           }
 768       }
 769   }
 770
 771   /* Storing an lsb-aligned field in a register
 772      can be done with a movstrict instruction.  */
 773
 774   if (!MEM_P (op0)
 775       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 776       && bitsize == GET_MODE_BITSIZE (fieldmode)
 777       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 778     {
 779       struct expand_operand ops[2];
 780       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 781       rtx arg0 = op0;
 782       unsigned HOST_WIDE_INT subreg_off;
 783
 784       if (GET_CODE (arg0) == SUBREG)
 785         {
 786           /* Else we've got some float mode source being extracted into
 787              a different float mode destination -- this combination of
 788              subregs results in Severe Tire Damage.  */
 789           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 790                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 791                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 792           arg0 = SUBREG_REG (arg0);
 793         }
 794
 795       subreg_off = bitnum / BITS_PER_UNIT;
 796       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 797         {
 798           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 799
 800           create_fixed_operand (&ops[0], arg0);
 801           /* Shrink the source operand to FIELDMODE.  */
 802           create_convert_operand_to (&ops[1], value, fieldmode, false);
 803           if (maybe_expand_insn (icode, 2, ops))
 804             return true;
 805         }
 806     }
 807
 808   /* Handle fields bigger than a word.  */
 809
 810   if (bitsize > BITS_PER_WORD)
 811     {
 812       /* Here we transfer the words of the field
 813          in the order least significant first.
 814          This is because the most significant word is the one which may
 815          be less than full.
 816          However, only do that if the value is not BLKmode.  */
 817
 818       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 819       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 820       unsigned int i;
 821       rtx_insn *last;
 822
 823       /* This is the mode we must force value to, so that there will be enough
 824          subwords to extract.  Note that fieldmode will often (always?) be
 825          VOIDmode, because that is what store_field uses to indicate that this
 826          is a bit field, but passing VOIDmode to operand_subword_force
 827          is not allowed.  */
 828       fieldmode = GET_MODE (value);
 829       if (fieldmode == VOIDmode)
 830         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 831
 832       last = get_last_insn ();
 833       for (i = 0; i < nwords; i++)
 834         {
 835           /* If I is 0, use the low-order word in both field and target;
 836              if I is 1, use the next to lowest word; and so on.  */
 837           unsigned int wordnum = (backwards
 838                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 839                                   - i - 1
 840                                   : i);
 841           unsigned int bit_offset = (backwards
 842                                      ? MAX ((int) bitsize - ((int) i + 1)
 843                                             * BITS_PER_WORD,
 844                                             0)
 845                                      : (int) i * BITS_PER_WORD);
 846           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 847           unsigned HOST_WIDE_INT new_bitsize =
 848             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 849
 850           /* If the remaining chunk doesn't have full wordsize we have
 851              to make sure that for big endian machines the higher order
 852              bits are used.  */
 853           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 854             value_word = simplify_expand_binop (word_mode, lshr_optab,
 855                                                 value_word,
 856                                                 GEN_INT (BITS_PER_WORD
 857                                                          - new_bitsize),
 858                                                 NULL_RTX, true,
 859                                                 OPTAB_LIB_WIDEN);
 860
 861           if (!store_bit_field_1 (op0, new_bitsize,
 862                                   bitnum + bit_offset,
 863                                   bitregion_start, bitregion_end,
 864                                   word_mode,
 865                                   value_word, fallback_p))
 866             {
 867               delete_insns_since (last);
 868               return false;
 869             }
 870         }
 871       return true;
 872     }
 873
 874   /* If VALUE has a floating-point or complex mode, access it as an
 875      integer of the corresponding size.  This can occur on a machine
 876      with 64 bit registers that uses SFmode for float.  It can also
 877      occur for unaligned float or complex fields.  */
 878   orig_value = value;
 879   if (GET_MODE (value) != VOIDmode
 880       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 881       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 882     {
 883       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 884       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 885     }
 886
 887   /* If OP0 is a multi-word register, narrow it to the affected word.
 888      If the region spans two words, defer to store_split_bit_field.  */
 889   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 890     {
 891       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
 892                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 893       gcc_assert (op0);
 894       bitnum %= BITS_PER_WORD;
 895       if (bitnum + bitsize > BITS_PER_WORD)
 896         {
 897           if (!fallback_p)
 898             return false;
 899
 900           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 901                                  bitregion_end, value);
 902           return true;
 903         }
 904     }
 905
 906   /* From here on we can assume that the field to be stored in fits
 907      within a word.  If the destination is a register, it too fits
 908      in a word.  */
 909
 910   extraction_insn insv;
 911   if (!MEM_P (op0)
 912       && get_best_reg_extraction_insn (&insv, EP_insv,
 913                                        GET_MODE_BITSIZE (GET_MODE (op0)),
 914                                        fieldmode)
 915       && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 916     return true;
 917
 918   /* If OP0 is a memory, try copying it to a register and seeing if a
 919      cheap register alternative is available.  */
 920   if (MEM_P (op0))
 921     {
 922       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
 923                                         fieldmode)
 924           && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 925         return true;
 926
 927       rtx_insn *last = get_last_insn ();
 928
 929       /* Try loading part of OP0 into a register, inserting the bitfield
 930          into that, and then copying the result back to OP0.  */
 931       unsigned HOST_WIDE_INT bitpos;
 932       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
 933                                                bitregion_start, bitregion_end,
 934                                                fieldmode, &bitpos);
 935       if (xop0)
 936         {
 937           rtx tempreg = copy_to_reg (xop0);
 938           if (store_bit_field_1 (tempreg, bitsize, bitpos,
 939                                  bitregion_start, bitregion_end,
 940                                  fieldmode, orig_value, false))
 941             {
 942               emit_move_insn (xop0, tempreg);
 943               return true;
 944             }
 945           delete_insns_since (last);
 946         }
 947     }
 948
 949   if (!fallback_p)
 950     return false;
 951
 952   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
 953                          bitregion_end, value);
 954   return true;
 955 }
 956
 957 /* Generate code to store value from rtx VALUE
 958    into a bit-field within structure STR_RTX
 959    containing BITSIZE bits starting at bit BITNUM.
 960
 961    BITREGION_START is bitpos of the first bitfield in this region.
 962    BITREGION_END is the bitpos of the ending bitfield in this region.
 963    These two fields are 0, if the C++ memory model does not apply,
 964    or we are not interested in keeping track of bitfield regions.
 965
 966    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 967
 968 void
 969 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 970                  unsigned HOST_WIDE_INT bitnum,
 971                  unsigned HOST_WIDE_INT bitregion_start,
 972                  unsigned HOST_WIDE_INT bitregion_end,
 973                  machine_mode fieldmode,
 974                  rtx value)
 975 {
 976   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
 977   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
 978                                   bitregion_start, bitregion_end))
 979     {
 980       /* Storing of a full word can be done with a simple store.
 981          We know here that the field can be accessed with one single
 982          instruction.  For targets that support unaligned memory,
 983          an unaligned access may be necessary.  */
 984       if (bitsize == GET_MODE_BITSIZE (fieldmode))
 985         {
 986           str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
 987                                              bitnum / BITS_PER_UNIT);
 988           gcc_assert (bitnum % BITS_PER_UNIT == 0);
 989           emit_move_insn (str_rtx, value);
 990         }
 991       else
 992         {
 993           rtx temp;
 994
 995           str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
 996                                           &bitnum);
 997           gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (fieldmode));
 998           temp = copy_to_reg (str_rtx);
 999           if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
1000                                   fieldmode, value, true))
1001             gcc_unreachable ();
1002
1003           emit_move_insn (str_rtx, temp);
1004         }
1005
1006       return;
1007     }
1008
1009   /* Under the C++0x memory model, we must not touch bits outside the
1010      bit region.  Adjust the address to start at the beginning of the
1011      bit region.  */
1012   if (MEM_P (str_rtx) && bitregion_start > 0)
1013     {
1014       machine_mode bestmode;
1015       HOST_WIDE_INT offset, size;
1016
1017       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1018
1019       offset = bitregion_start / BITS_PER_UNIT;
1020       bitnum -= bitregion_start;
1021       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1022       bitregion_end -= bitregion_start;
1023       bitregion_start = 0;
1024       bestmode = get_best_mode (bitsize, bitnum,
1025                                 bitregion_start, bitregion_end,
1026                                 MEM_ALIGN (str_rtx), VOIDmode,
1027                                 MEM_VOLATILE_P (str_rtx));
1028       str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
1029     }
1030
1031   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1032                           bitregion_start, bitregion_end,
1033                           fieldmode, value, true))
1034     gcc_unreachable ();
1035 }
1036 \f
1037 /* Use shifts and boolean operations to store VALUE into a bit field of
1038    width BITSIZE in OP0, starting at bit BITNUM.  */
1039
1040 static void
1041 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1042                        unsigned HOST_WIDE_INT bitnum,
1043                        unsigned HOST_WIDE_INT bitregion_start,
1044                        unsigned HOST_WIDE_INT bitregion_end,
1045                        rtx value)
1046 {
1047   /* There is a case not handled here:
1048      a structure with a known alignment of just a halfword
1049      and a field split across two aligned halfwords within the structure.
1050      Or likewise a structure with a known alignment of just a byte
1051      and a field split across two bytes.
1052      Such cases are not supposed to be able to occur.  */
1053
1054   if (MEM_P (op0))
1055     {
1056       machine_mode mode = GET_MODE (op0);
1057       if (GET_MODE_BITSIZE (mode) == 0
1058           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1059         mode = word_mode;
1060       mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1061                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1062
1063       if (mode == VOIDmode)
1064         {
1065           /* The only way this should occur is if the field spans word
1066              boundaries.  */
1067           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1068                                  bitregion_end, value);
1069           return;
1070         }
1071
1072       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1073     }
1074
1075   store_fixed_bit_field_1 (op0, bitsize, bitnum, value);
1076 }
1077
1078 /* Helper function for store_fixed_bit_field, stores
1079    the bit field always using the MODE of OP0.  */
1080
1081 static void
1082 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1083                          unsigned HOST_WIDE_INT bitnum,
1084                          rtx value)
1085 {
1086   machine_mode mode;
1087   rtx temp;
1088   int all_zero = 0;
1089   int all_one = 0;
1090
1091   mode = GET_MODE (op0);
1092   gcc_assert (SCALAR_INT_MODE_P (mode));
1093
1094   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1095      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1096
1097   if (BYTES_BIG_ENDIAN)
1098     /* BITNUM is the distance between our msb
1099        and that of the containing datum.
1100        Convert it to the distance from the lsb.  */
1101     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1102
1103   /* Now BITNUM is always the distance between our lsb
1104      and that of OP0.  */
1105
1106   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1107      we must first convert its mode to MODE.  */
1108
1109   if (CONST_INT_P (value))
1110     {
1111       unsigned HOST_WIDE_INT v = UINTVAL (value);
1112
1113       if (bitsize < HOST_BITS_PER_WIDE_INT)
1114         v &= ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1;
1115
1116       if (v == 0)
1117         all_zero = 1;
1118       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1119                 && v == ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1)
1120                || (bitsize == HOST_BITS_PER_WIDE_INT
1121                    && v == (unsigned HOST_WIDE_INT) -1))
1122         all_one = 1;
1123
1124       value = lshift_value (mode, v, bitnum);
1125     }
1126   else
1127     {
1128       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1129                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1130
1131       if (GET_MODE (value) != mode)
1132         value = convert_to_mode (mode, value, 1);
1133
1134       if (must_and)
1135         value = expand_binop (mode, and_optab, value,
1136                               mask_rtx (mode, 0, bitsize, 0),
1137                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1138       if (bitnum > 0)
1139         value = expand_shift (LSHIFT_EXPR, mode, value,
1140                               bitnum, NULL_RTX, 1);
1141     }
1142
1143   /* Now clear the chosen bits in OP0,
1144      except that if VALUE is -1 we need not bother.  */
1145   /* We keep the intermediates in registers to allow CSE to combine
1146      consecutive bitfield assignments.  */
1147
1148   temp = force_reg (mode, op0);
1149
1150   if (! all_one)
1151     {
1152       temp = expand_binop (mode, and_optab, temp,
1153                            mask_rtx (mode, bitnum, bitsize, 1),
1154                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1155       temp = force_reg (mode, temp);
1156     }
1157
1158   /* Now logical-or VALUE into OP0, unless it is zero.  */
1159
1160   if (! all_zero)
1161     {
1162       temp = expand_binop (mode, ior_optab, temp, value,
1163                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1164       temp = force_reg (mode, temp);
1165     }
1166
1167   if (op0 != temp)
1168     {
1169       op0 = copy_rtx (op0);
1170       emit_move_insn (op0, temp);
1171     }
1172 }
1173 \f
1174 /* Store a bit field that is split across multiple accessible memory objects.
1175
1176    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1177    BITSIZE is the field width; BITPOS the position of its first bit
1178    (within the word).
1179    VALUE is the value to store.
1180
1181    This does not yet handle fields wider than BITS_PER_WORD.  */
1182
1183 static void
1184 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1185                        unsigned HOST_WIDE_INT bitpos,
1186                        unsigned HOST_WIDE_INT bitregion_start,
1187                        unsigned HOST_WIDE_INT bitregion_end,
1188                        rtx value)
1189 {
1190   unsigned int unit;
1191   unsigned int bitsdone = 0;
1192
1193   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1194      much at a time.  */
1195   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1196     unit = BITS_PER_WORD;
1197   else
1198     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1199
1200   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1201      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1202      again, and we will mutually recurse forever.  */
1203   if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1204     unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1205
1206   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1207      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1208      that VALUE might be a floating-point constant.  */
1209   if (CONSTANT_P (value) && !CONST_INT_P (value))
1210     {
1211       rtx word = gen_lowpart_common (word_mode, value);
1212
1213       if (word && (value != word))
1214         value = word;
1215       else
1216         value = gen_lowpart_common (word_mode,
1217                                     force_reg (GET_MODE (value) != VOIDmode
1218                                                ? GET_MODE (value)
1219                                                : word_mode, value));
1220     }
1221
1222   while (bitsdone < bitsize)
1223     {
1224       unsigned HOST_WIDE_INT thissize;
1225       rtx part, word;
1226       unsigned HOST_WIDE_INT thispos;
1227       unsigned HOST_WIDE_INT offset;
1228
1229       offset = (bitpos + bitsdone) / unit;
1230       thispos = (bitpos + bitsdone) % unit;
1231
1232       /* When region of bytes we can touch is restricted, decrease
1233          UNIT close to the end of the region as needed.  If op0 is a REG
1234          or SUBREG of REG, don't do this, as there can't be data races
1235          on a register and we can expand shorter code in some cases.  */
1236       if (bitregion_end
1237           && unit > BITS_PER_UNIT
1238           && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1239           && !REG_P (op0)
1240           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1241         {
1242           unit = unit / 2;
1243           continue;
1244         }
1245
1246       /* THISSIZE must not overrun a word boundary.  Otherwise,
1247          store_fixed_bit_field will call us again, and we will mutually
1248          recurse forever.  */
1249       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1250       thissize = MIN (thissize, unit - thispos);
1251
1252       if (BYTES_BIG_ENDIAN)
1253         {
1254           /* Fetch successively less significant portions.  */
1255           if (CONST_INT_P (value))
1256             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1257                              >> (bitsize - bitsdone - thissize))
1258                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1259           else
1260             {
1261               int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1262               /* The args are chosen so that the last part includes the
1263                  lsb.  Give extract_bit_field the value it needs (with
1264                  endianness compensation) to fetch the piece we want.  */
1265               part = extract_fixed_bit_field (word_mode, value, thissize,
1266                                               total_bits - bitsize + bitsdone,
1267                                               NULL_RTX, 1);
1268             }
1269         }
1270       else
1271         {
1272           /* Fetch successively more significant portions.  */
1273           if (CONST_INT_P (value))
1274             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1275                              >> bitsdone)
1276                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1277           else
1278             part = extract_fixed_bit_field (word_mode, value, thissize,
1279                                             bitsdone, NULL_RTX, 1);
1280         }
1281
1282       /* If OP0 is a register, then handle OFFSET here.
1283
1284          When handling multiword bitfields, extract_bit_field may pass
1285          down a word_mode SUBREG of a larger REG for a bitfield that actually
1286          crosses a word boundary.  Thus, for a SUBREG, we must find
1287          the current word starting from the base register.  */
1288       if (GET_CODE (op0) == SUBREG)
1289         {
1290           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD)
1291                             + (offset * unit / BITS_PER_WORD);
1292           machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1293           if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1294             word = word_offset ? const0_rtx : op0;
1295           else
1296             word = operand_subword_force (SUBREG_REG (op0), word_offset,
1297                                           GET_MODE (SUBREG_REG (op0)));
1298           offset &= BITS_PER_WORD / unit - 1;
1299         }
1300       else if (REG_P (op0))
1301         {
1302           machine_mode op0_mode = GET_MODE (op0);
1303           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1304             word = offset ? const0_rtx : op0;
1305           else
1306             word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1307                                           GET_MODE (op0));
1308           offset &= BITS_PER_WORD / unit - 1;
1309         }
1310       else
1311         word = op0;
1312
1313       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1314          it is just an out-of-bounds access.  Ignore it.  */
1315       if (word != const0_rtx)
1316         store_fixed_bit_field (word, thissize, offset * unit + thispos,
1317                                bitregion_start, bitregion_end, part);
1318       bitsdone += thissize;
1319     }
1320 }
1321 \f
1322 /* A subroutine of extract_bit_field_1 that converts return value X
1323    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1324    to extract_bit_field.  */
1325
1326 static rtx
1327 convert_extracted_bit_field (rtx x, machine_mode mode,
1328                              machine_mode tmode, bool unsignedp)
1329 {
1330   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1331     return x;
1332
1333   /* If the x mode is not a scalar integral, first convert to the
1334      integer mode of that size and then access it as a floating-point
1335      value via a SUBREG.  */
1336   if (!SCALAR_INT_MODE_P (tmode))
1337     {
1338       machine_mode smode;
1339
1340       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1341       x = convert_to_mode (smode, x, unsignedp);
1342       x = force_reg (smode, x);
1343       return gen_lowpart (tmode, x);
1344     }
1345
1346   return convert_to_mode (tmode, x, unsignedp);
1347 }
1348
1349 /* Try to use an ext(z)v pattern to extract a field from OP0.
1350    Return the extracted value on success, otherwise return null.
1351    EXT_MODE is the mode of the extraction and the other arguments
1352    are as for extract_bit_field.  */
1353
1354 static rtx
1355 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1356                               unsigned HOST_WIDE_INT bitsize,
1357                               unsigned HOST_WIDE_INT bitnum,
1358                               int unsignedp, rtx target,
1359                               machine_mode mode, machine_mode tmode)
1360 {
1361   struct expand_operand ops[4];
1362   rtx spec_target = target;
1363   rtx spec_target_subreg = 0;
1364   machine_mode ext_mode = extv->field_mode;
1365   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1366
1367   if (bitsize == 0 || unit < bitsize)
1368     return NULL_RTX;
1369
1370   if (MEM_P (op0))
1371     /* Get a reference to the first byte of the field.  */
1372     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1373                                 &bitnum);
1374   else
1375     {
1376       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1377       if (BYTES_BIG_ENDIAN)
1378         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1379
1380       /* If op0 is a register, we need it in EXT_MODE to make it
1381          acceptable to the format of ext(z)v.  */
1382       if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1383         return NULL_RTX;
1384       if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1385         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1386     }
1387
1388   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1389      "backwards" from the size of the unit we are extracting from.
1390      Otherwise, we count bits from the most significant on a
1391      BYTES/BITS_BIG_ENDIAN machine.  */
1392
1393   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1394     bitnum = unit - bitsize - bitnum;
1395
1396   if (target == 0)
1397     target = spec_target = gen_reg_rtx (tmode);
1398
1399   if (GET_MODE (target) != ext_mode)
1400     {
1401       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1402          between the mode of the extraction (word_mode) and the target
1403          mode.  Instead, create a temporary and use convert_move to set
1404          the target.  */
1405       if (REG_P (target)
1406           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1407         {
1408           target = gen_lowpart (ext_mode, target);
1409           if (GET_MODE_PRECISION (ext_mode)
1410               > GET_MODE_PRECISION (GET_MODE (spec_target)))
1411             spec_target_subreg = target;
1412         }
1413       else
1414         target = gen_reg_rtx (ext_mode);
1415     }
1416
1417   create_output_operand (&ops[0], target, ext_mode);
1418   create_fixed_operand (&ops[1], op0);
1419   create_integer_operand (&ops[2], bitsize);
1420   create_integer_operand (&ops[3], bitnum);
1421   if (maybe_expand_insn (extv->icode, 4, ops))
1422     {
1423       target = ops[0].value;
1424       if (target == spec_target)
1425         return target;
1426       if (target == spec_target_subreg)
1427         return spec_target;
1428       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1429     }
1430   return NULL_RTX;
1431 }
1432
1433 /* A subroutine of extract_bit_field, with the same arguments.
1434    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1435    if we can find no other means of implementing the operation.
1436    if FALLBACK_P is false, return NULL instead.  */
1437
1438 static rtx
1439 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1440                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1441                      machine_mode mode, machine_mode tmode,
1442                      bool fallback_p)
1443 {
1444   rtx op0 = str_rtx;
1445   machine_mode int_mode;
1446   machine_mode mode1;
1447
1448   if (tmode == VOIDmode)
1449     tmode = mode;
1450
1451   while (GET_CODE (op0) == SUBREG)
1452     {
1453       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1454       op0 = SUBREG_REG (op0);
1455     }
1456
1457   /* If we have an out-of-bounds access to a register, just return an
1458      uninitialized register of the required mode.  This can occur if the
1459      source code contains an out-of-bounds access to a small array.  */
1460   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1461     return gen_reg_rtx (tmode);
1462
1463   if (REG_P (op0)
1464       && mode == GET_MODE (op0)
1465       && bitnum == 0
1466       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1467     {
1468       /* We're trying to extract a full register from itself.  */
1469       return op0;
1470     }
1471
1472   /* See if we can get a better vector mode before extracting.  */
1473   if (VECTOR_MODE_P (GET_MODE (op0))
1474       && !MEM_P (op0)
1475       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1476     {
1477       machine_mode new_mode;
1478
1479       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1480         new_mode = MIN_MODE_VECTOR_FLOAT;
1481       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1482         new_mode = MIN_MODE_VECTOR_FRACT;
1483       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1484         new_mode = MIN_MODE_VECTOR_UFRACT;
1485       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1486         new_mode = MIN_MODE_VECTOR_ACCUM;
1487       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1488         new_mode = MIN_MODE_VECTOR_UACCUM;
1489       else
1490         new_mode = MIN_MODE_VECTOR_INT;
1491
1492       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1493         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1494             && targetm.vector_mode_supported_p (new_mode))
1495           break;
1496       if (new_mode != VOIDmode)
1497         op0 = gen_lowpart (new_mode, op0);
1498     }
1499
1500   /* Use vec_extract patterns for extracting parts of vectors whenever
1501      available.  */
1502   if (VECTOR_MODE_P (GET_MODE (op0))
1503       && !MEM_P (op0)
1504       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1505       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1506           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1507     {
1508       struct expand_operand ops[3];
1509       machine_mode outermode = GET_MODE (op0);
1510       machine_mode innermode = GET_MODE_INNER (outermode);
1511       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1512       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1513
1514       create_output_operand (&ops[0], target, innermode);
1515       create_input_operand (&ops[1], op0, outermode);
1516       create_integer_operand (&ops[2], pos);
1517       if (maybe_expand_insn (icode, 3, ops))
1518         {
1519           target = ops[0].value;
1520           if (GET_MODE (target) != mode)
1521             return gen_lowpart (tmode, target);
1522           return target;
1523         }
1524     }
1525
1526   /* Make sure we are playing with integral modes.  Pun with subregs
1527      if we aren't.  */
1528   {
1529     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1530     if (imode != GET_MODE (op0))
1531       {
1532         if (MEM_P (op0))
1533           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1534         else if (imode != BLKmode)
1535           {
1536             op0 = gen_lowpart (imode, op0);
1537
1538             /* If we got a SUBREG, force it into a register since we
1539                aren't going to be able to do another SUBREG on it.  */
1540             if (GET_CODE (op0) == SUBREG)
1541               op0 = force_reg (imode, op0);
1542           }
1543         else if (REG_P (op0))
1544           {
1545             rtx reg, subreg;
1546             imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1547                                             MODE_INT);
1548             reg = gen_reg_rtx (imode);
1549             subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1550             emit_move_insn (subreg, op0);
1551             op0 = reg;
1552             bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1553           }
1554         else
1555           {
1556             HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1557             rtx mem = assign_stack_temp (GET_MODE (op0), size);
1558             emit_move_insn (mem, op0);
1559             op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1560           }
1561       }
1562   }
1563
1564   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1565      If that's wrong, the solution is to test for it and set TARGET to 0
1566      if needed.  */
1567
1568   /* Get the mode of the field to use for atomic access or subreg
1569      conversion.  */
1570   mode1 = mode;
1571   if (SCALAR_INT_MODE_P (tmode))
1572     {
1573       machine_mode try_mode = mode_for_size (bitsize,
1574                                                   GET_MODE_CLASS (tmode), 0);
1575       if (try_mode != BLKmode)
1576         mode1 = try_mode;
1577     }
1578   gcc_assert (mode1 != BLKmode);
1579
1580   /* Extraction of a full MODE1 value can be done with a subreg as long
1581      as the least significant bit of the value is the least significant
1582      bit of either OP0 or a word of OP0.  */
1583   if (!MEM_P (op0)
1584       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1585       && bitsize == GET_MODE_BITSIZE (mode1)
1586       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1587     {
1588       rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1589                                      bitnum / BITS_PER_UNIT);
1590       if (sub)
1591         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1592     }
1593
1594   /* Extraction of a full MODE1 value can be done with a load as long as
1595      the field is on a byte boundary and is sufficiently aligned.  */
1596   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1597     {
1598       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1599       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1600     }
1601
1602   /* Handle fields bigger than a word.  */
1603
1604   if (bitsize > BITS_PER_WORD)
1605     {
1606       /* Here we transfer the words of the field
1607          in the order least significant first.
1608          This is because the most significant word is the one which may
1609          be less than full.  */
1610
1611       unsigned int backwards = WORDS_BIG_ENDIAN;
1612       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1613       unsigned int i;
1614       rtx_insn *last;
1615
1616       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1617         target = gen_reg_rtx (mode);
1618
1619       /* In case we're about to clobber a base register or something
1620          (see gcc.c-torture/execute/20040625-1.c).   */
1621       if (reg_mentioned_p (target, str_rtx))
1622         target = gen_reg_rtx (mode);
1623
1624       /* Indicate for flow that the entire target reg is being set.  */
1625       emit_clobber (target);
1626
1627       last = get_last_insn ();
1628       for (i = 0; i < nwords; i++)
1629         {
1630           /* If I is 0, use the low-order word in both field and target;
1631              if I is 1, use the next to lowest word; and so on.  */
1632           /* Word number in TARGET to use.  */
1633           unsigned int wordnum
1634             = (backwards
1635                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1636                : i);
1637           /* Offset from start of field in OP0.  */
1638           unsigned int bit_offset = (backwards
1639                                      ? MAX ((int) bitsize - ((int) i + 1)
1640                                             * BITS_PER_WORD,
1641                                             0)
1642                                      : (int) i * BITS_PER_WORD);
1643           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1644           rtx result_part
1645             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1646                                              bitsize - i * BITS_PER_WORD),
1647                                    bitnum + bit_offset, 1, target_part,
1648                                    mode, word_mode, fallback_p);
1649
1650           gcc_assert (target_part);
1651           if (!result_part)
1652             {
1653               delete_insns_since (last);
1654               return NULL;
1655             }
1656
1657           if (result_part != target_part)
1658             emit_move_insn (target_part, result_part);
1659         }
1660
1661       if (unsignedp)
1662         {
1663           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1664              need to be zero'd out.  */
1665           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1666             {
1667               unsigned int i, total_words;
1668
1669               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1670               for (i = nwords; i < total_words; i++)
1671                 emit_move_insn
1672                   (operand_subword (target,
1673                                     backwards ? total_words - i - 1 : i,
1674                                     1, VOIDmode),
1675                    const0_rtx);
1676             }
1677           return target;
1678         }
1679
1680       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1681       target = expand_shift (LSHIFT_EXPR, mode, target,
1682                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1683       return expand_shift (RSHIFT_EXPR, mode, target,
1684                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1685     }
1686
1687   /* If OP0 is a multi-word register, narrow it to the affected word.
1688      If the region spans two words, defer to extract_split_bit_field.  */
1689   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1690     {
1691       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1692                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1693       bitnum %= BITS_PER_WORD;
1694       if (bitnum + bitsize > BITS_PER_WORD)
1695         {
1696           if (!fallback_p)
1697             return NULL_RTX;
1698           target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1699           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1700         }
1701     }
1702
1703   /* From here on we know the desired field is smaller than a word.
1704      If OP0 is a register, it too fits within a word.  */
1705   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1706   extraction_insn extv;
1707   if (!MEM_P (op0)
1708       /* ??? We could limit the structure size to the part of OP0 that
1709          contains the field, with appropriate checks for endianness
1710          and TRULY_NOOP_TRUNCATION.  */
1711       && get_best_reg_extraction_insn (&extv, pattern,
1712                                        GET_MODE_BITSIZE (GET_MODE (op0)),
1713                                        tmode))
1714     {
1715       rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1716                                                  unsignedp, target, mode,
1717                                                  tmode);
1718       if (result)
1719         return result;
1720     }
1721
1722   /* If OP0 is a memory, try copying it to a register and seeing if a
1723      cheap register alternative is available.  */
1724   if (MEM_P (op0))
1725     {
1726       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1727                                         tmode))
1728         {
1729           rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1730                                                      bitnum, unsignedp,
1731                                                      target, mode,
1732                                                      tmode);
1733           if (result)
1734             return result;
1735         }
1736
1737       rtx_insn *last = get_last_insn ();
1738
1739       /* Try loading part of OP0 into a register and extracting the
1740          bitfield from that.  */
1741       unsigned HOST_WIDE_INT bitpos;
1742       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1743                                                0, 0, tmode, &bitpos);
1744       if (xop0)
1745         {
1746           xop0 = copy_to_reg (xop0);
1747           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1748                                             unsignedp, target,
1749                                             mode, tmode, false);
1750           if (result)
1751             return result;
1752           delete_insns_since (last);
1753         }
1754     }
1755
1756   if (!fallback_p)
1757     return NULL;
1758
1759   /* Find a correspondingly-sized integer field, so we can apply
1760      shifts and masks to it.  */
1761   int_mode = int_mode_for_mode (tmode);
1762   if (int_mode == BLKmode)
1763     int_mode = int_mode_for_mode (mode);
1764   /* Should probably push op0 out to memory and then do a load.  */
1765   gcc_assert (int_mode != BLKmode);
1766
1767   target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
1768                                     target, unsignedp);
1769   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1770 }
1771
1772 /* Generate code to extract a byte-field from STR_RTX
1773    containing BITSIZE bits, starting at BITNUM,
1774    and put it in TARGET if possible (if TARGET is nonzero).
1775    Regardless of TARGET, we return the rtx for where the value is placed.
1776
1777    STR_RTX is the structure containing the byte (a REG or MEM).
1778    UNSIGNEDP is nonzero if this is an unsigned bit field.
1779    MODE is the natural mode of the field value once extracted.
1780    TMODE is the mode the caller would like the value to have;
1781    but the value may be returned with type MODE instead.
1782
1783    If a TARGET is specified and we can store in it at no extra cost,
1784    we do so, and return TARGET.
1785    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1786    if they are equally easy.  */
1787
1788 rtx
1789 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1790                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1791                    machine_mode mode, machine_mode tmode)
1792 {
1793   machine_mode mode1;
1794
1795   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1796   if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1797     mode1 = GET_MODE (str_rtx);
1798   else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1799     mode1 = GET_MODE (target);
1800   else
1801     mode1 = tmode;
1802
1803   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1804     {
1805       /* Extraction of a full MODE1 value can be done with a simple load.
1806          We know here that the field can be accessed with one single
1807          instruction.  For targets that support unaligned memory,
1808          an unaligned access may be necessary.  */
1809       if (bitsize == GET_MODE_BITSIZE (mode1))
1810         {
1811           rtx result = adjust_bitfield_address (str_rtx, mode1,
1812                                                 bitnum / BITS_PER_UNIT);
1813           gcc_assert (bitnum % BITS_PER_UNIT == 0);
1814           return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1815         }
1816
1817       str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1818                                       &bitnum);
1819       gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (mode1));
1820       str_rtx = copy_to_reg (str_rtx);
1821     }
1822
1823   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1824                               target, mode, tmode, true);
1825 }
1826 \f
1827 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1828    from bit BITNUM of OP0.
1829
1830    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1831    If TARGET is nonzero, attempts to store the value there
1832    and return TARGET, but this is not guaranteed.
1833    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1834
1835 static rtx
1836 extract_fixed_bit_field (machine_mode tmode, rtx op0,
1837                          unsigned HOST_WIDE_INT bitsize,
1838                          unsigned HOST_WIDE_INT bitnum, rtx target,
1839                          int unsignedp)
1840 {
1841   if (MEM_P (op0))
1842     {
1843       machine_mode mode
1844         = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1845                          MEM_VOLATILE_P (op0));
1846
1847       if (mode == VOIDmode)
1848         /* The only way this should occur is if the field spans word
1849            boundaries.  */
1850         return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1851
1852       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1853     }
1854
1855   return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1856                                     target, unsignedp);
1857 }
1858
1859 /* Helper function for extract_fixed_bit_field, extracts
1860    the bit field always using the MODE of OP0.  */
1861
1862 static rtx
1863 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
1864                            unsigned HOST_WIDE_INT bitsize,
1865                            unsigned HOST_WIDE_INT bitnum, rtx target,
1866                            int unsignedp)
1867 {
1868   machine_mode mode = GET_MODE (op0);
1869   gcc_assert (SCALAR_INT_MODE_P (mode));
1870
1871   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1872      for invalid input, such as extract equivalent of f5 from
1873      gcc.dg/pr48335-2.c.  */
1874
1875   if (BYTES_BIG_ENDIAN)
1876     /* BITNUM is the distance between our msb and that of OP0.
1877        Convert it to the distance from the lsb.  */
1878     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1879
1880   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1881      We have reduced the big-endian case to the little-endian case.  */
1882
1883   if (unsignedp)
1884     {
1885       if (bitnum)
1886         {
1887           /* If the field does not already start at the lsb,
1888              shift it so it does.  */
1889           /* Maybe propagate the target for the shift.  */
1890           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1891           if (tmode != mode)
1892             subtarget = 0;
1893           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1894         }
1895       /* Convert the value to the desired mode.  */
1896       if (mode != tmode)
1897         op0 = convert_to_mode (tmode, op0, 1);
1898
1899       /* Unless the msb of the field used to be the msb when we shifted,
1900          mask out the upper bits.  */
1901
1902       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
1903         return expand_binop (GET_MODE (op0), and_optab, op0,
1904                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1905                              target, 1, OPTAB_LIB_WIDEN);
1906       return op0;
1907     }
1908
1909   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1910      then arithmetic-shift its lsb to the lsb of the word.  */
1911   op0 = force_reg (mode, op0);
1912
1913   /* Find the narrowest integer mode that contains the field.  */
1914
1915   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1916        mode = GET_MODE_WIDER_MODE (mode))
1917     if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
1918       {
1919         op0 = convert_to_mode (mode, op0, 0);
1920         break;
1921       }
1922
1923   if (mode != tmode)
1924     target = 0;
1925
1926   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
1927     {
1928       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
1929       /* Maybe propagate the target for the shift.  */
1930       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1931       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1932     }
1933
1934   return expand_shift (RSHIFT_EXPR, mode, op0,
1935                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1936 }
1937
1938 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1939    VALUE << BITPOS.  */
1940
1941 static rtx
1942 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
1943               int bitpos)
1944 {
1945   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
1946 }
1947 \f
1948 /* Extract a bit field that is split across two words
1949    and return an RTX for the result.
1950
1951    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1952    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1953    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1954
1955 static rtx
1956 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1957                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1958 {
1959   unsigned int unit;
1960   unsigned int bitsdone = 0;
1961   rtx result = NULL_RTX;
1962   int first = 1;
1963
1964   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1965      much at a time.  */
1966   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1967     unit = BITS_PER_WORD;
1968   else
1969     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1970
1971   while (bitsdone < bitsize)
1972     {
1973       unsigned HOST_WIDE_INT thissize;
1974       rtx part, word;
1975       unsigned HOST_WIDE_INT thispos;
1976       unsigned HOST_WIDE_INT offset;
1977
1978       offset = (bitpos + bitsdone) / unit;
1979       thispos = (bitpos + bitsdone) % unit;
1980
1981       /* THISSIZE must not overrun a word boundary.  Otherwise,
1982          extract_fixed_bit_field will call us again, and we will mutually
1983          recurse forever.  */
1984       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1985       thissize = MIN (thissize, unit - thispos);
1986
1987       /* If OP0 is a register, then handle OFFSET here.
1988
1989          When handling multiword bitfields, extract_bit_field may pass
1990          down a word_mode SUBREG of a larger REG for a bitfield that actually
1991          crosses a word boundary.  Thus, for a SUBREG, we must find
1992          the current word starting from the base register.  */
1993       if (GET_CODE (op0) == SUBREG)
1994         {
1995           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1996           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1997                                         GET_MODE (SUBREG_REG (op0)));
1998           offset = 0;
1999         }
2000       else if (REG_P (op0))
2001         {
2002           word = operand_subword_force (op0, offset, GET_MODE (op0));
2003           offset = 0;
2004         }
2005       else
2006         word = op0;
2007
2008       /* Extract the parts in bit-counting order,
2009          whose meaning is determined by BYTES_PER_UNIT.
2010          OFFSET is in UNITs, and UNIT is in bits.  */
2011       part = extract_fixed_bit_field (word_mode, word, thissize,
2012                                       offset * unit + thispos, 0, 1);
2013       bitsdone += thissize;
2014
2015       /* Shift this part into place for the result.  */
2016       if (BYTES_BIG_ENDIAN)
2017         {
2018           if (bitsize != bitsdone)
2019             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2020                                  bitsize - bitsdone, 0, 1);
2021         }
2022       else
2023         {
2024           if (bitsdone != thissize)
2025             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2026                                  bitsdone - thissize, 0, 1);
2027         }
2028
2029       if (first)
2030         result = part;
2031       else
2032         /* Combine the parts with bitwise or.  This works
2033            because we extracted each part as an unsigned bit field.  */
2034         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2035                                OPTAB_LIB_WIDEN);
2036
2037       first = 0;
2038     }
2039
2040   /* Unsigned bit field: we are done.  */
2041   if (unsignedp)
2042     return result;
2043   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2044   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2045                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2046   return expand_shift (RSHIFT_EXPR, word_mode, result,
2047                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2048 }
2049 \f
2050 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2051    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2052    MODE, fill the upper bits with zeros.  Fail if the layout of either
2053    mode is unknown (as for CC modes) or if the extraction would involve
2054    unprofitable mode punning.  Return the value on success, otherwise
2055    return null.
2056
2057    This is different from gen_lowpart* in these respects:
2058
2059      - the returned value must always be considered an rvalue
2060
2061      - when MODE is wider than SRC_MODE, the extraction involves
2062        a zero extension
2063
2064      - when MODE is smaller than SRC_MODE, the extraction involves
2065        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2066
2067    In other words, this routine performs a computation, whereas the
2068    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2069    operations.  */
2070
2071 rtx
2072 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2073 {
2074   machine_mode int_mode, src_int_mode;
2075
2076   if (mode == src_mode)
2077     return src;
2078
2079   if (CONSTANT_P (src))
2080     {
2081       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2082          fails, it will happily create (subreg (symbol_ref)) or similar
2083          invalid SUBREGs.  */
2084       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2085       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2086       if (ret)
2087         return ret;
2088
2089       if (GET_MODE (src) == VOIDmode
2090           || !validate_subreg (mode, src_mode, src, byte))
2091         return NULL_RTX;
2092
2093       src = force_reg (GET_MODE (src), src);
2094       return gen_rtx_SUBREG (mode, src, byte);
2095     }
2096
2097   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2098     return NULL_RTX;
2099
2100   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2101       && MODES_TIEABLE_P (mode, src_mode))
2102     {
2103       rtx x = gen_lowpart_common (mode, src);
2104       if (x)
2105         return x;
2106     }
2107
2108   src_int_mode = int_mode_for_mode (src_mode);
2109   int_mode = int_mode_for_mode (mode);
2110   if (src_int_mode == BLKmode || int_mode == BLKmode)
2111     return NULL_RTX;
2112
2113   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2114     return NULL_RTX;
2115   if (!MODES_TIEABLE_P (int_mode, mode))
2116     return NULL_RTX;
2117
2118   src = gen_lowpart (src_int_mode, src);
2119   src = convert_modes (int_mode, src_int_mode, src, true);
2120   src = gen_lowpart (mode, src);
2121   return src;
2122 }
2123 \f
2124 /* Add INC into TARGET.  */
2125
2126 void
2127 expand_inc (rtx target, rtx inc)
2128 {
2129   rtx value = expand_binop (GET_MODE (target), add_optab,
2130                             target, inc,
2131                             target, 0, OPTAB_LIB_WIDEN);
2132   if (value != target)
2133     emit_move_insn (target, value);
2134 }
2135
2136 /* Subtract DEC from TARGET.  */
2137
2138 void
2139 expand_dec (rtx target, rtx dec)
2140 {
2141   rtx value = expand_binop (GET_MODE (target), sub_optab,
2142                             target, dec,
2143                             target, 0, OPTAB_LIB_WIDEN);
2144   if (value != target)
2145     emit_move_insn (target, value);
2146 }
2147 \f
2148 /* Output a shift instruction for expression code CODE,
2149    with SHIFTED being the rtx for the value to shift,
2150    and AMOUNT the rtx for the amount to shift by.
2151    Store the result in the rtx TARGET, if that is convenient.
2152    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2153    Return the rtx for where the value is.  */
2154
2155 static rtx
2156 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2157                 rtx amount, rtx target, int unsignedp)
2158 {
2159   rtx op1, temp = 0;
2160   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2161   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2162   optab lshift_optab = ashl_optab;
2163   optab rshift_arith_optab = ashr_optab;
2164   optab rshift_uns_optab = lshr_optab;
2165   optab lrotate_optab = rotl_optab;
2166   optab rrotate_optab = rotr_optab;
2167   machine_mode op1_mode;
2168   machine_mode scalar_mode = mode;
2169   int attempt;
2170   bool speed = optimize_insn_for_speed_p ();
2171
2172   if (VECTOR_MODE_P (mode))
2173     scalar_mode = GET_MODE_INNER (mode);
2174   op1 = amount;
2175   op1_mode = GET_MODE (op1);
2176
2177   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2178      shift amount is a vector, use the vector/vector shift patterns.  */
2179   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2180     {
2181       lshift_optab = vashl_optab;
2182       rshift_arith_optab = vashr_optab;
2183       rshift_uns_optab = vlshr_optab;
2184       lrotate_optab = vrotl_optab;
2185       rrotate_optab = vrotr_optab;
2186     }
2187
2188   /* Previously detected shift-counts computed by NEGATE_EXPR
2189      and shifted in the other direction; but that does not work
2190      on all machines.  */
2191
2192   if (SHIFT_COUNT_TRUNCATED)
2193     {
2194       if (CONST_INT_P (op1)
2195           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2196               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2197         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2198                        % GET_MODE_BITSIZE (scalar_mode));
2199       else if (GET_CODE (op1) == SUBREG
2200                && subreg_lowpart_p (op1)
2201                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2202                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2203         op1 = SUBREG_REG (op1);
2204     }
2205
2206   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2207      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2208      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2209      amount instead.  */
2210   if (rotate
2211       && CONST_INT_P (op1)
2212       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2213                    GET_MODE_BITSIZE (scalar_mode) - 1))
2214     {
2215       op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2216       left = !left;
2217       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2218     }
2219
2220   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2221      Note that this is not the case for bigger values.  For instance a rotation
2222      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2223      0x04030201 (bswapsi).  */
2224   if (rotate
2225       && CONST_INT_P (op1)
2226       && INTVAL (op1) == BITS_PER_UNIT
2227       && GET_MODE_SIZE (scalar_mode) == 2
2228       && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2229     return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2230                                   unsignedp);
2231
2232   if (op1 == const0_rtx)
2233     return shifted;
2234
2235   /* Check whether its cheaper to implement a left shift by a constant
2236      bit count by a sequence of additions.  */
2237   if (code == LSHIFT_EXPR
2238       && CONST_INT_P (op1)
2239       && INTVAL (op1) > 0
2240       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2241       && INTVAL (op1) < MAX_BITS_PER_WORD
2242       && (shift_cost (speed, mode, INTVAL (op1))
2243           > INTVAL (op1) * add_cost (speed, mode))
2244       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2245     {
2246       int i;
2247       for (i = 0; i < INTVAL (op1); i++)
2248         {
2249           temp = force_reg (mode, shifted);
2250           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2251                                   unsignedp, OPTAB_LIB_WIDEN);
2252         }
2253       return shifted;
2254     }
2255
2256   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2257     {
2258       enum optab_methods methods;
2259
2260       if (attempt == 0)
2261         methods = OPTAB_DIRECT;
2262       else if (attempt == 1)
2263         methods = OPTAB_WIDEN;
2264       else
2265         methods = OPTAB_LIB_WIDEN;
2266
2267       if (rotate)
2268         {
2269           /* Widening does not work for rotation.  */
2270           if (methods == OPTAB_WIDEN)
2271             continue;
2272           else if (methods == OPTAB_LIB_WIDEN)
2273             {
2274               /* If we have been unable to open-code this by a rotation,
2275                  do it as the IOR of two shifts.  I.e., to rotate A
2276                  by N bits, compute
2277                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2278                  where C is the bitsize of A.
2279
2280                  It is theoretically possible that the target machine might
2281                  not be able to perform either shift and hence we would
2282                  be making two libcalls rather than just the one for the
2283                  shift (similarly if IOR could not be done).  We will allow
2284                  this extremely unlikely lossage to avoid complicating the
2285                  code below.  */
2286
2287               rtx subtarget = target == shifted ? 0 : target;
2288               rtx new_amount, other_amount;
2289               rtx temp1;
2290
2291               new_amount = op1;
2292               if (op1 == const0_rtx)
2293                 return shifted;
2294               else if (CONST_INT_P (op1))
2295                 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2296                                         - INTVAL (op1));
2297               else
2298                 {
2299                   other_amount
2300                     = simplify_gen_unary (NEG, GET_MODE (op1),
2301                                           op1, GET_MODE (op1));
2302                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2303                   other_amount
2304                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2305                                            gen_int_mode (mask, GET_MODE (op1)));
2306                 }
2307
2308               shifted = force_reg (mode, shifted);
2309
2310               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2311                                      mode, shifted, new_amount, 0, 1);
2312               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2313                                       mode, shifted, other_amount,
2314                                       subtarget, 1);
2315               return expand_binop (mode, ior_optab, temp, temp1, target,
2316                                    unsignedp, methods);
2317             }
2318
2319           temp = expand_binop (mode,
2320                                left ? lrotate_optab : rrotate_optab,
2321                                shifted, op1, target, unsignedp, methods);
2322         }
2323       else if (unsignedp)
2324         temp = expand_binop (mode,
2325                              left ? lshift_optab : rshift_uns_optab,
2326                              shifted, op1, target, unsignedp, methods);
2327
2328       /* Do arithmetic shifts.
2329          Also, if we are going to widen the operand, we can just as well
2330          use an arithmetic right-shift instead of a logical one.  */
2331       if (temp == 0 && ! rotate
2332           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2333         {
2334           enum optab_methods methods1 = methods;
2335
2336           /* If trying to widen a log shift to an arithmetic shift,
2337              don't accept an arithmetic shift of the same size.  */
2338           if (unsignedp)
2339             methods1 = OPTAB_MUST_WIDEN;
2340
2341           /* Arithmetic shift */
2342
2343           temp = expand_binop (mode,
2344                                left ? lshift_optab : rshift_arith_optab,
2345                                shifted, op1, target, unsignedp, methods1);
2346         }
2347
2348       /* We used to try extzv here for logical right shifts, but that was
2349          only useful for one machine, the VAX, and caused poor code
2350          generation there for lshrdi3, so the code was deleted and a
2351          define_expand for lshrsi3 was added to vax.md.  */
2352     }
2353
2354   gcc_assert (temp);
2355   return temp;
2356 }
2357
2358 /* Output a shift instruction for expression code CODE,
2359    with SHIFTED being the rtx for the value to shift,
2360    and AMOUNT the amount to shift by.
2361    Store the result in the rtx TARGET, if that is convenient.
2362    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2363    Return the rtx for where the value is.  */
2364
2365 rtx
2366 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2367               int amount, rtx target, int unsignedp)
2368 {
2369   return expand_shift_1 (code, mode,
2370                          shifted, GEN_INT (amount), target, unsignedp);
2371 }
2372
2373 /* Output a shift instruction for expression code CODE,
2374    with SHIFTED being the rtx for the value to shift,
2375    and AMOUNT the tree for the amount to shift by.
2376    Store the result in the rtx TARGET, if that is convenient.
2377    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2378    Return the rtx for where the value is.  */
2379
2380 rtx
2381 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2382                        tree amount, rtx target, int unsignedp)
2383 {
2384   return expand_shift_1 (code, mode,
2385                          shifted, expand_normal (amount), target, unsignedp);
2386 }
2387
2388 \f
2389 /* Indicates the type of fixup needed after a constant multiplication.
2390    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2391    the result should be negated, and ADD_VARIANT means that the
2392    multiplicand should be added to the result.  */
2393 enum mult_variant {basic_variant, negate_variant, add_variant};
2394
2395 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2396                         const struct mult_cost *, machine_mode mode);
2397 static bool choose_mult_variant (machine_mode, HOST_WIDE_INT,
2398                                  struct algorithm *, enum mult_variant *, int);
2399 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2400                               const struct algorithm *, enum mult_variant);
2401 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2402 static rtx extract_high_half (machine_mode, rtx);
2403 static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2404 static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
2405                                        int, int);
2406 /* Compute and return the best algorithm for multiplying by T.
2407    The algorithm must cost less than cost_limit
2408    If retval.cost >= COST_LIMIT, no algorithm was found and all
2409    other field of the returned struct are undefined.
2410    MODE is the machine mode of the multiplication.  */
2411
2412 static void
2413 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2414             const struct mult_cost *cost_limit, machine_mode mode)
2415 {
2416   int m;
2417   struct algorithm *alg_in, *best_alg;
2418   struct mult_cost best_cost;
2419   struct mult_cost new_limit;
2420   int op_cost, op_latency;
2421   unsigned HOST_WIDE_INT orig_t = t;
2422   unsigned HOST_WIDE_INT q;
2423   int maxm, hash_index;
2424   bool cache_hit = false;
2425   enum alg_code cache_alg = alg_zero;
2426   bool speed = optimize_insn_for_speed_p ();
2427   machine_mode imode;
2428   struct alg_hash_entry *entry_ptr;
2429
2430   /* Indicate that no algorithm is yet found.  If no algorithm
2431      is found, this value will be returned and indicate failure.  */
2432   alg_out->cost.cost = cost_limit->cost + 1;
2433   alg_out->cost.latency = cost_limit->latency + 1;
2434
2435   if (cost_limit->cost < 0
2436       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2437     return;
2438
2439   /* Be prepared for vector modes.  */
2440   imode = GET_MODE_INNER (mode);
2441   if (imode == VOIDmode)
2442     imode = mode;
2443
2444   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2445
2446   /* Restrict the bits of "t" to the multiplication's mode.  */
2447   t &= GET_MODE_MASK (imode);
2448
2449   /* t == 1 can be done in zero cost.  */
2450   if (t == 1)
2451     {
2452       alg_out->ops = 1;
2453       alg_out->cost.cost = 0;
2454       alg_out->cost.latency = 0;
2455       alg_out->op[0] = alg_m;
2456       return;
2457     }
2458
2459   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2460      fail now.  */
2461   if (t == 0)
2462     {
2463       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2464         return;
2465       else
2466         {
2467           alg_out->ops = 1;
2468           alg_out->cost.cost = zero_cost (speed);
2469           alg_out->cost.latency = zero_cost (speed);
2470           alg_out->op[0] = alg_zero;
2471           return;
2472         }
2473     }
2474
2475   /* We'll be needing a couple extra algorithm structures now.  */
2476
2477   alg_in = XALLOCA (struct algorithm);
2478   best_alg = XALLOCA (struct algorithm);
2479   best_cost = *cost_limit;
2480
2481   /* Compute the hash index.  */
2482   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2483
2484   /* See if we already know what to do for T.  */
2485   entry_ptr = alg_hash_entry_ptr (hash_index);
2486   if (entry_ptr->t == t
2487       && entry_ptr->mode == mode
2488       && entry_ptr->mode == mode
2489       && entry_ptr->speed == speed
2490       && entry_ptr->alg != alg_unknown)
2491     {
2492       cache_alg = entry_ptr->alg;
2493
2494       if (cache_alg == alg_impossible)
2495         {
2496           /* The cache tells us that it's impossible to synthesize
2497              multiplication by T within entry_ptr->cost.  */
2498           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2499             /* COST_LIMIT is at least as restrictive as the one
2500                recorded in the hash table, in which case we have no
2501                hope of synthesizing a multiplication.  Just
2502                return.  */
2503             return;
2504
2505           /* If we get here, COST_LIMIT is less restrictive than the
2506              one recorded in the hash table, so we may be able to
2507              synthesize a multiplication.  Proceed as if we didn't
2508              have the cache entry.  */
2509         }
2510       else
2511         {
2512           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2513             /* The cached algorithm shows that this multiplication
2514                requires more cost than COST_LIMIT.  Just return.  This
2515                way, we don't clobber this cache entry with
2516                alg_impossible but retain useful information.  */
2517             return;
2518
2519           cache_hit = true;
2520
2521           switch (cache_alg)
2522             {
2523             case alg_shift:
2524               goto do_alg_shift;
2525
2526             case alg_add_t_m2:
2527             case alg_sub_t_m2:
2528               goto do_alg_addsub_t_m2;
2529
2530             case alg_add_factor:
2531             case alg_sub_factor:
2532               goto do_alg_addsub_factor;
2533
2534             case alg_add_t2_m:
2535               goto do_alg_add_t2_m;
2536
2537             case alg_sub_t2_m:
2538               goto do_alg_sub_t2_m;
2539
2540             default:
2541               gcc_unreachable ();
2542             }
2543         }
2544     }
2545
2546   /* If we have a group of zero bits at the low-order part of T, try
2547      multiplying by the remaining bits and then doing a shift.  */
2548
2549   if ((t & 1) == 0)
2550     {
2551     do_alg_shift:
2552       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2553       if (m < maxm)
2554         {
2555           q = t >> m;
2556           /* The function expand_shift will choose between a shift and
2557              a sequence of additions, so the observed cost is given as
2558              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2559           op_cost = m * add_cost (speed, mode);
2560           if (shift_cost (speed, mode, m) < op_cost)
2561             op_cost = shift_cost (speed, mode, m);
2562           new_limit.cost = best_cost.cost - op_cost;
2563           new_limit.latency = best_cost.latency - op_cost;
2564           synth_mult (alg_in, q, &new_limit, mode);
2565
2566           alg_in->cost.cost += op_cost;
2567           alg_in->cost.latency += op_cost;
2568           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2569             {
2570               best_cost = alg_in->cost;
2571               std::swap (alg_in, best_alg);
2572               best_alg->log[best_alg->ops] = m;
2573               best_alg->op[best_alg->ops] = alg_shift;
2574             }
2575
2576           /* See if treating ORIG_T as a signed number yields a better
2577              sequence.  Try this sequence only for a negative ORIG_T
2578              as it would be useless for a non-negative ORIG_T.  */
2579           if ((HOST_WIDE_INT) orig_t < 0)
2580             {
2581               /* Shift ORIG_T as follows because a right shift of a
2582                  negative-valued signed type is implementation
2583                  defined.  */
2584               q = ~(~orig_t >> m);
2585               /* The function expand_shift will choose between a shift
2586                  and a sequence of additions, so the observed cost is
2587                  given as MIN (m * add_cost(speed, mode),
2588                  shift_cost(speed, mode, m)).  */
2589               op_cost = m * add_cost (speed, mode);
2590               if (shift_cost (speed, mode, m) < op_cost)
2591                 op_cost = shift_cost (speed, mode, m);
2592               new_limit.cost = best_cost.cost - op_cost;
2593               new_limit.latency = best_cost.latency - op_cost;
2594               synth_mult (alg_in, q, &new_limit, mode);
2595
2596               alg_in->cost.cost += op_cost;
2597               alg_in->cost.latency += op_cost;
2598               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2599                 {
2600                   best_cost = alg_in->cost;
2601                   std::swap (alg_in, best_alg);
2602                   best_alg->log[best_alg->ops] = m;
2603                   best_alg->op[best_alg->ops] = alg_shift;
2604                 }
2605             }
2606         }
2607       if (cache_hit)
2608         goto done;
2609     }
2610
2611   /* If we have an odd number, add or subtract one.  */
2612   if ((t & 1) != 0)
2613     {
2614       unsigned HOST_WIDE_INT w;
2615
2616     do_alg_addsub_t_m2:
2617       for (w = 1; (w & t) != 0; w <<= 1)
2618         ;
2619       /* If T was -1, then W will be zero after the loop.  This is another
2620          case where T ends with ...111.  Handling this with (T + 1) and
2621          subtract 1 produces slightly better code and results in algorithm
2622          selection much faster than treating it like the ...0111 case
2623          below.  */
2624       if (w == 0
2625           || (w > 2
2626               /* Reject the case where t is 3.
2627                  Thus we prefer addition in that case.  */
2628               && t != 3))
2629         {
2630           /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2631
2632           op_cost = add_cost (speed, mode);
2633           new_limit.cost = best_cost.cost - op_cost;
2634           new_limit.latency = best_cost.latency - op_cost;
2635           synth_mult (alg_in, t + 1, &new_limit, mode);
2636
2637           alg_in->cost.cost += op_cost;
2638           alg_in->cost.latency += op_cost;
2639           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2640             {
2641               best_cost = alg_in->cost;
2642               std::swap (alg_in, best_alg);
2643               best_alg->log[best_alg->ops] = 0;
2644               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2645             }
2646         }
2647       else
2648         {
2649           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2650
2651           op_cost = add_cost (speed, mode);
2652           new_limit.cost = best_cost.cost - op_cost;
2653           new_limit.latency = best_cost.latency - op_cost;
2654           synth_mult (alg_in, t - 1, &new_limit, mode);
2655
2656           alg_in->cost.cost += op_cost;
2657           alg_in->cost.latency += op_cost;
2658           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2659             {
2660               best_cost = alg_in->cost;
2661               std::swap (alg_in, best_alg);
2662               best_alg->log[best_alg->ops] = 0;
2663               best_alg->op[best_alg->ops] = alg_add_t_m2;
2664             }
2665         }
2666
2667       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2668          quickly with a - a * n for some appropriate constant n.  */
2669       m = exact_log2 (-orig_t + 1);
2670       if (m >= 0 && m < maxm)
2671         {
2672           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2673           /* If the target has a cheap shift-and-subtract insn use
2674              that in preference to a shift insn followed by a sub insn.
2675              Assume that the shift-and-sub is "atomic" with a latency
2676              equal to it's cost, otherwise assume that on superscalar
2677              hardware the shift may be executed concurrently with the
2678              earlier steps in the algorithm.  */
2679           if (shiftsub1_cost (speed, mode, m) <= op_cost)
2680             {
2681               op_cost = shiftsub1_cost (speed, mode, m);
2682               op_latency = op_cost;
2683             }
2684           else
2685             op_latency = add_cost (speed, mode);
2686
2687           new_limit.cost = best_cost.cost - op_cost;
2688           new_limit.latency = best_cost.latency - op_latency;
2689           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2690                       &new_limit, mode);
2691
2692           alg_in->cost.cost += op_cost;
2693           alg_in->cost.latency += op_latency;
2694           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2695             {
2696               best_cost = alg_in->cost;
2697               std::swap (alg_in, best_alg);
2698               best_alg->log[best_alg->ops] = m;
2699               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2700             }
2701         }
2702
2703       if (cache_hit)
2704         goto done;
2705     }
2706
2707   /* Look for factors of t of the form
2708      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2709      If we find such a factor, we can multiply by t using an algorithm that
2710      multiplies by q, shift the result by m and add/subtract it to itself.
2711
2712      We search for large factors first and loop down, even if large factors
2713      are less probable than small; if we find a large factor we will find a
2714      good sequence quickly, and therefore be able to prune (by decreasing
2715      COST_LIMIT) the search.  */
2716
2717  do_alg_addsub_factor:
2718   for (m = floor_log2 (t - 1); m >= 2; m--)
2719     {
2720       unsigned HOST_WIDE_INT d;
2721
2722       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2723       if (t % d == 0 && t > d && m < maxm
2724           && (!cache_hit || cache_alg == alg_add_factor))
2725         {
2726           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2727           if (shiftadd_cost (speed, mode, m) <= op_cost)
2728             op_cost = shiftadd_cost (speed, mode, m);
2729
2730           op_latency = op_cost;
2731
2732
2733           new_limit.cost = best_cost.cost - op_cost;
2734           new_limit.latency = best_cost.latency - op_latency;
2735           synth_mult (alg_in, t / d, &new_limit, mode);
2736
2737           alg_in->cost.cost += op_cost;
2738           alg_in->cost.latency += op_latency;
2739           if (alg_in->cost.latency < op_cost)
2740             alg_in->cost.latency = op_cost;
2741           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2742             {
2743               best_cost = alg_in->cost;
2744               std::swap (alg_in, best_alg);
2745               best_alg->log[best_alg->ops] = m;
2746               best_alg->op[best_alg->ops] = alg_add_factor;
2747             }
2748           /* Other factors will have been taken care of in the recursion.  */
2749           break;
2750         }
2751
2752       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2753       if (t % d == 0 && t > d && m < maxm
2754           && (!cache_hit || cache_alg == alg_sub_factor))
2755         {
2756           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2757           if (shiftsub0_cost (speed, mode, m) <= op_cost)
2758             op_cost = shiftsub0_cost (speed, mode, m);
2759
2760           op_latency = op_cost;
2761
2762           new_limit.cost = best_cost.cost - op_cost;
2763           new_limit.latency = best_cost.latency - op_latency;
2764           synth_mult (alg_in, t / d, &new_limit, mode);
2765
2766           alg_in->cost.cost += op_cost;
2767           alg_in->cost.latency += op_latency;
2768           if (alg_in->cost.latency < op_cost)
2769             alg_in->cost.latency = op_cost;
2770           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2771             {
2772               best_cost = alg_in->cost;
2773               std::swap (alg_in, best_alg);
2774               best_alg->log[best_alg->ops] = m;
2775               best_alg->op[best_alg->ops] = alg_sub_factor;
2776             }
2777           break;
2778         }
2779     }
2780   if (cache_hit)
2781     goto done;
2782
2783   /* Try shift-and-add (load effective address) instructions,
2784      i.e. do a*3, a*5, a*9.  */
2785   if ((t & 1) != 0)
2786     {
2787     do_alg_add_t2_m:
2788       q = t - 1;
2789       q = q & -q;
2790       m = exact_log2 (q);
2791       if (m >= 0 && m < maxm)
2792         {
2793           op_cost = shiftadd_cost (speed, mode, m);
2794           new_limit.cost = best_cost.cost - op_cost;
2795           new_limit.latency = best_cost.latency - op_cost;
2796           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2797
2798           alg_in->cost.cost += op_cost;
2799           alg_in->cost.latency += op_cost;
2800           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2801             {
2802               best_cost = alg_in->cost;
2803               std::swap (alg_in, best_alg);
2804               best_alg->log[best_alg->ops] = m;
2805               best_alg->op[best_alg->ops] = alg_add_t2_m;
2806             }
2807         }
2808       if (cache_hit)
2809         goto done;
2810
2811     do_alg_sub_t2_m:
2812       q = t + 1;
2813       q = q & -q;
2814       m = exact_log2 (q);
2815       if (m >= 0 && m < maxm)
2816         {
2817           op_cost = shiftsub0_cost (speed, mode, m);
2818           new_limit.cost = best_cost.cost - op_cost;
2819           new_limit.latency = best_cost.latency - op_cost;
2820           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2821
2822           alg_in->cost.cost += op_cost;
2823           alg_in->cost.latency += op_cost;
2824           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2825             {
2826               best_cost = alg_in->cost;
2827               std::swap (alg_in, best_alg);
2828               best_alg->log[best_alg->ops] = m;
2829               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2830             }
2831         }
2832       if (cache_hit)
2833         goto done;
2834     }
2835
2836  done:
2837   /* If best_cost has not decreased, we have not found any algorithm.  */
2838   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2839     {
2840       /* We failed to find an algorithm.  Record alg_impossible for
2841          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2842          we are asked to find an algorithm for T within the same or
2843          lower COST_LIMIT, we can immediately return to the
2844          caller.  */
2845       entry_ptr->t = t;
2846       entry_ptr->mode = mode;
2847       entry_ptr->speed = speed;
2848       entry_ptr->alg = alg_impossible;
2849       entry_ptr->cost = *cost_limit;
2850       return;
2851     }
2852
2853   /* Cache the result.  */
2854   if (!cache_hit)
2855     {
2856       entry_ptr->t = t;
2857       entry_ptr->mode = mode;
2858       entry_ptr->speed = speed;
2859       entry_ptr->alg = best_alg->op[best_alg->ops];
2860       entry_ptr->cost.cost = best_cost.cost;
2861       entry_ptr->cost.latency = best_cost.latency;
2862     }
2863
2864   /* If we are getting a too long sequence for `struct algorithm'
2865      to record, make this search fail.  */
2866   if (best_alg->ops == MAX_BITS_PER_WORD)
2867     return;
2868
2869   /* Copy the algorithm from temporary space to the space at alg_out.
2870      We avoid using structure assignment because the majority of
2871      best_alg is normally undefined, and this is a critical function.  */
2872   alg_out->ops = best_alg->ops + 1;
2873   alg_out->cost = best_cost;
2874   memcpy (alg_out->op, best_alg->op,
2875           alg_out->ops * sizeof *alg_out->op);
2876   memcpy (alg_out->log, best_alg->log,
2877           alg_out->ops * sizeof *alg_out->log);
2878 }
2879 \f
2880 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2881    Try three variations:
2882
2883        - a shift/add sequence based on VAL itself
2884        - a shift/add sequence based on -VAL, followed by a negation
2885        - a shift/add sequence based on VAL - 1, followed by an addition.
2886
2887    Return true if the cheapest of these cost less than MULT_COST,
2888    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2889
2890 static bool
2891 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
2892                      struct algorithm *alg, enum mult_variant *variant,
2893                      int mult_cost)
2894 {
2895   struct algorithm alg2;
2896   struct mult_cost limit;
2897   int op_cost;
2898   bool speed = optimize_insn_for_speed_p ();
2899
2900   /* Fail quickly for impossible bounds.  */
2901   if (mult_cost < 0)
2902     return false;
2903
2904   /* Ensure that mult_cost provides a reasonable upper bound.
2905      Any constant multiplication can be performed with less
2906      than 2 * bits additions.  */
2907   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2908   if (mult_cost > op_cost)
2909     mult_cost = op_cost;
2910
2911   *variant = basic_variant;
2912   limit.cost = mult_cost;
2913   limit.latency = mult_cost;
2914   synth_mult (alg, val, &limit, mode);
2915
2916   /* This works only if the inverted value actually fits in an
2917      `unsigned int' */
2918   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2919     {
2920       op_cost = neg_cost (speed, mode);
2921       if (MULT_COST_LESS (&alg->cost, mult_cost))
2922         {
2923           limit.cost = alg->cost.cost - op_cost;
2924           limit.latency = alg->cost.latency - op_cost;
2925         }
2926       else
2927         {
2928           limit.cost = mult_cost - op_cost;
2929           limit.latency = mult_cost - op_cost;
2930         }
2931
2932       synth_mult (&alg2, -val, &limit, mode);
2933       alg2.cost.cost += op_cost;
2934       alg2.cost.latency += op_cost;
2935       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2936         *alg = alg2, *variant = negate_variant;
2937     }
2938
2939   /* This proves very useful for division-by-constant.  */
2940   op_cost = add_cost (speed, mode);
2941   if (MULT_COST_LESS (&alg->cost, mult_cost))
2942     {
2943       limit.cost = alg->cost.cost - op_cost;
2944       limit.latency = alg->cost.latency - op_cost;
2945     }
2946   else
2947     {
2948       limit.cost = mult_cost - op_cost;
2949       limit.latency = mult_cost - op_cost;
2950     }
2951
2952   synth_mult (&alg2, val - 1, &limit, mode);
2953   alg2.cost.cost += op_cost;
2954   alg2.cost.latency += op_cost;
2955   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2956     *alg = alg2, *variant = add_variant;
2957
2958   return MULT_COST_LESS (&alg->cost, mult_cost);
2959 }
2960
2961 /* A subroutine of expand_mult, used for constant multiplications.
2962    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2963    convenient.  Use the shift/add sequence described by ALG and apply
2964    the final fixup specified by VARIANT.  */
2965
2966 static rtx
2967 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
2968                    rtx target, const struct algorithm *alg,
2969                    enum mult_variant variant)
2970 {
2971   HOST_WIDE_INT val_so_far;
2972   rtx_insn *insn;
2973   rtx accum, tem;
2974   int opno;
2975   machine_mode nmode;
2976
2977   /* Avoid referencing memory over and over and invalid sharing
2978      on SUBREGs.  */
2979   op0 = force_reg (mode, op0);
2980
2981   /* ACCUM starts out either as OP0 or as a zero, depending on
2982      the first operation.  */
2983
2984   if (alg->op[0] == alg_zero)
2985     {
2986       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2987       val_so_far = 0;
2988     }
2989   else if (alg->op[0] == alg_m)
2990     {
2991       accum = copy_to_mode_reg (mode, op0);
2992       val_so_far = 1;
2993     }
2994   else
2995     gcc_unreachable ();
2996
2997   for (opno = 1; opno < alg->ops; opno++)
2998     {
2999       int log = alg->log[opno];
3000       rtx shift_subtarget = optimize ? 0 : accum;
3001       rtx add_target
3002         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3003            && !optimize)
3004           ? target : 0;
3005       rtx accum_target = optimize ? 0 : accum;
3006       rtx accum_inner;
3007
3008       switch (alg->op[opno])
3009         {
3010         case alg_shift:
3011           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3012           /* REG_EQUAL note will be attached to the following insn.  */
3013           emit_move_insn (accum, tem);
3014           val_so_far <<= log;
3015           break;
3016
3017         case alg_add_t_m2:
3018           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3019           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3020                                  add_target ? add_target : accum_target);
3021           val_so_far += (HOST_WIDE_INT) 1 << log;
3022           break;
3023
3024         case alg_sub_t_m2:
3025           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3026           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3027                                  add_target ? add_target : accum_target);
3028           val_so_far -= (HOST_WIDE_INT) 1 << log;
3029           break;
3030
3031         case alg_add_t2_m:
3032           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3033                                 log, shift_subtarget, 0);
3034           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3035                                  add_target ? add_target : accum_target);
3036           val_so_far = (val_so_far << log) + 1;
3037           break;
3038
3039         case alg_sub_t2_m:
3040           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3041                                 log, shift_subtarget, 0);
3042           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3043                                  add_target ? add_target : accum_target);
3044           val_so_far = (val_so_far << log) - 1;
3045           break;
3046
3047         case alg_add_factor:
3048           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3049           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3050                                  add_target ? add_target : accum_target);
3051           val_so_far += val_so_far << log;
3052           break;
3053
3054         case alg_sub_factor:
3055           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3056           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3057                                  (add_target
3058                                   ? add_target : (optimize ? 0 : tem)));
3059           val_so_far = (val_so_far << log) - val_so_far;
3060           break;
3061
3062         default:
3063           gcc_unreachable ();
3064         }
3065
3066       if (SCALAR_INT_MODE_P (mode))
3067         {
3068           /* Write a REG_EQUAL note on the last insn so that we can cse
3069              multiplication sequences.  Note that if ACCUM is a SUBREG,
3070              we've set the inner register and must properly indicate that.  */
3071           tem = op0, nmode = mode;
3072           accum_inner = accum;
3073           if (GET_CODE (accum) == SUBREG)
3074             {
3075               accum_inner = SUBREG_REG (accum);
3076               nmode = GET_MODE (accum_inner);
3077               tem = gen_lowpart (nmode, op0);
3078             }
3079
3080           insn = get_last_insn ();
3081           set_dst_reg_note (insn, REG_EQUAL,
3082                             gen_rtx_MULT (nmode, tem,
3083                                           gen_int_mode (val_so_far, nmode)),
3084                             accum_inner);
3085         }
3086     }
3087
3088   if (variant == negate_variant)
3089     {
3090       val_so_far = -val_so_far;
3091       accum = expand_unop (mode, neg_optab, accum, target, 0);
3092     }
3093   else if (variant == add_variant)
3094     {
3095       val_so_far = val_so_far + 1;
3096       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3097     }
3098
3099   /* Compare only the bits of val and val_so_far that are significant
3100      in the result mode, to avoid sign-/zero-extension confusion.  */
3101   nmode = GET_MODE_INNER (mode);
3102   if (nmode == VOIDmode)
3103     nmode = mode;
3104   val &= GET_MODE_MASK (nmode);
3105   val_so_far &= GET_MODE_MASK (nmode);
3106   gcc_assert (val == val_so_far);
3107
3108   return accum;
3109 }
3110
3111 /* Perform a multiplication and return an rtx for the result.
3112    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3113    TARGET is a suggestion for where to store the result (an rtx).
3114
3115    We check specially for a constant integer as OP1.
3116    If you want this check for OP0 as well, then before calling
3117    you should swap the two operands if OP0 would be constant.  */
3118
3119 rtx
3120 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3121              int unsignedp)
3122 {
3123   enum mult_variant variant;
3124   struct algorithm algorithm;
3125   rtx scalar_op1;
3126   int max_cost;
3127   bool speed = optimize_insn_for_speed_p ();
3128   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3129
3130   if (CONSTANT_P (op0))
3131     std::swap (op0, op1);
3132
3133   /* For vectors, there are several simplifications that can be made if
3134      all elements of the vector constant are identical.  */
3135   scalar_op1 = op1;
3136   if (GET_CODE (op1) == CONST_VECTOR)
3137     {
3138       int i, n = CONST_VECTOR_NUNITS (op1);
3139       scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3140       for (i = 1; i < n; ++i)
3141         if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3142           goto skip_scalar;
3143     }
3144
3145   if (INTEGRAL_MODE_P (mode))
3146     {
3147       rtx fake_reg;
3148       HOST_WIDE_INT coeff;
3149       bool is_neg;
3150       int mode_bitsize;
3151
3152       if (op1 == CONST0_RTX (mode))
3153         return op1;
3154       if (op1 == CONST1_RTX (mode))
3155         return op0;
3156       if (op1 == CONSTM1_RTX (mode))
3157         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3158                             op0, target, 0);
3159
3160       if (do_trapv)
3161         goto skip_synth;
3162
3163       /* If mode is integer vector mode, check if the backend supports
3164          vector lshift (by scalar or vector) at all.  If not, we can't use
3165          synthetized multiply.  */
3166       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3167           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3168           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3169         goto skip_synth;
3170
3171       /* These are the operations that are potentially turned into
3172          a sequence of shifts and additions.  */
3173       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3174
3175       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3176          less than or equal in size to `unsigned int' this doesn't matter.
3177          If the mode is larger than `unsigned int', then synth_mult works
3178          only if the constant value exactly fits in an `unsigned int' without
3179          any truncation.  This means that multiplying by negative values does
3180          not work; results are off by 2^32 on a 32 bit machine.  */
3181       if (CONST_INT_P (scalar_op1))
3182         {
3183           coeff = INTVAL (scalar_op1);
3184           is_neg = coeff < 0;
3185         }
3186 #if TARGET_SUPPORTS_WIDE_INT
3187       else if (CONST_WIDE_INT_P (scalar_op1))
3188 #else
3189       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3190 #endif
3191         {
3192           int shift = wi::exact_log2 (std::make_pair (scalar_op1, mode));
3193           /* Perfect power of 2 (other than 1, which is handled above).  */
3194           if (shift > 0)
3195             return expand_shift (LSHIFT_EXPR, mode, op0,
3196                                  shift, target, unsignedp);
3197           else
3198             goto skip_synth;
3199         }
3200       else
3201         goto skip_synth;
3202
3203       /* We used to test optimize here, on the grounds that it's better to
3204          produce a smaller program when -O is not used.  But this causes
3205          such a terrible slowdown sometimes that it seems better to always
3206          use synth_mult.  */
3207
3208       /* Special case powers of two.  */
3209       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3210           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3211         return expand_shift (LSHIFT_EXPR, mode, op0,
3212                              floor_log2 (coeff), target, unsignedp);
3213
3214       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3215
3216       /* Attempt to handle multiplication of DImode values by negative
3217          coefficients, by performing the multiplication by a positive
3218          multiplier and then inverting the result.  */
3219       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3220         {
3221           /* Its safe to use -coeff even for INT_MIN, as the
3222              result is interpreted as an unsigned coefficient.
3223              Exclude cost of op0 from max_cost to match the cost
3224              calculation of the synth_mult.  */
3225           coeff = -(unsigned HOST_WIDE_INT) coeff;
3226           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3227                       - neg_cost (speed, mode));
3228           if (max_cost <= 0)
3229             goto skip_synth;
3230
3231           /* Special case powers of two.  */
3232           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3233             {
3234               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3235                                        floor_log2 (coeff), target, unsignedp);
3236               return expand_unop (mode, neg_optab, temp, target, 0);
3237             }
3238
3239           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3240                                    max_cost))
3241             {
3242               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3243                                             &algorithm, variant);
3244               return expand_unop (mode, neg_optab, temp, target, 0);
3245             }
3246           goto skip_synth;
3247         }
3248
3249       /* Exclude cost of op0 from max_cost to match the cost
3250          calculation of the synth_mult.  */
3251       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3252       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3253         return expand_mult_const (mode, op0, coeff, target,
3254                                   &algorithm, variant);
3255     }
3256  skip_synth:
3257
3258   /* Expand x*2.0 as x+x.  */
3259   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3260     {
3261       REAL_VALUE_TYPE d;
3262       REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3263
3264       if (REAL_VALUES_EQUAL (d, dconst2))
3265         {
3266           op0 = force_reg (GET_MODE (op0), op0);
3267           return expand_binop (mode, add_optab, op0, op0,
3268                                target, unsignedp, OPTAB_LIB_WIDEN);
3269         }
3270     }
3271  skip_scalar:
3272
3273   /* This used to use umul_optab if unsigned, but for non-widening multiply
3274      there is no difference between signed and unsigned.  */
3275   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3276                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3277   gcc_assert (op0);
3278   return op0;
3279 }
3280
3281 /* Return a cost estimate for multiplying a register by the given
3282    COEFFicient in the given MODE and SPEED.  */
3283
3284 int
3285 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3286 {
3287   int max_cost;
3288   struct algorithm algorithm;
3289   enum mult_variant variant;
3290
3291   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3292   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3293   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3294     return algorithm.cost.cost;
3295   else
3296     return max_cost;
3297 }
3298
3299 /* Perform a widening multiplication and return an rtx for the result.
3300    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3301    TARGET is a suggestion for where to store the result (an rtx).
3302    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3303    or smul_widen_optab.
3304
3305    We check specially for a constant integer as OP1, comparing the
3306    cost of a widening multiply against the cost of a sequence of shifts
3307    and adds.  */
3308
3309 rtx
3310 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3311                       int unsignedp, optab this_optab)
3312 {
3313   bool speed = optimize_insn_for_speed_p ();
3314   rtx cop1;
3315
3316   if (CONST_INT_P (op1)
3317       && GET_MODE (op0) != VOIDmode
3318       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3319                                 this_optab == umul_widen_optab))
3320       && CONST_INT_P (cop1)
3321       && (INTVAL (cop1) >= 0
3322           || HWI_COMPUTABLE_MODE_P (mode)))
3323     {
3324       HOST_WIDE_INT coeff = INTVAL (cop1);
3325       int max_cost;
3326       enum mult_variant variant;
3327       struct algorithm algorithm;
3328
3329       if (coeff == 0)
3330         return CONST0_RTX (mode);
3331
3332       /* Special case powers of two.  */
3333       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3334         {
3335           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3336           return expand_shift (LSHIFT_EXPR, mode, op0,
3337                                floor_log2 (coeff), target, unsignedp);
3338         }
3339
3340       /* Exclude cost of op0 from max_cost to match the cost
3341          calculation of the synth_mult.  */
3342       max_cost = mul_widen_cost (speed, mode);
3343       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3344                                max_cost))
3345         {
3346           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3347           return expand_mult_const (mode, op0, coeff, target,
3348                                     &algorithm, variant);
3349         }
3350     }
3351   return expand_binop (mode, this_optab, op0, op1, target,
3352                        unsignedp, OPTAB_LIB_WIDEN);
3353 }
3354 \f
3355 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3356    replace division by D, and put the least significant N bits of the result
3357    in *MULTIPLIER_PTR and return the most significant bit.
3358
3359    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3360    needed precision is in PRECISION (should be <= N).
3361
3362    PRECISION should be as small as possible so this function can choose
3363    multiplier more freely.
3364
3365    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3366    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3367
3368    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3369    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3370
3371 unsigned HOST_WIDE_INT
3372 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3373                    unsigned HOST_WIDE_INT *multiplier_ptr,
3374                    int *post_shift_ptr, int *lgup_ptr)
3375 {
3376   int lgup, post_shift;
3377   int pow, pow2;
3378
3379   /* lgup = ceil(log2(divisor)); */
3380   lgup = ceil_log2 (d);
3381
3382   gcc_assert (lgup <= n);
3383
3384   pow = n + lgup;
3385   pow2 = n + lgup - precision;
3386
3387   /* mlow = 2^(N + lgup)/d */
3388   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3389   wide_int mlow = wi::udiv_trunc (val, d);
3390
3391   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3392   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3393   wide_int mhigh = wi::udiv_trunc (val, d);
3394
3395   /* If precision == N, then mlow, mhigh exceed 2^N
3396      (but they do not exceed 2^(N+1)).  */
3397
3398   /* Reduce to lowest terms.  */
3399   for (post_shift = lgup; post_shift > 0; post_shift--)
3400     {
3401       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3402                                                        HOST_BITS_PER_WIDE_INT);
3403       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3404                                                        HOST_BITS_PER_WIDE_INT);
3405       if (ml_lo >= mh_lo)
3406         break;
3407
3408       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3409       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3410     }
3411
3412   *post_shift_ptr = post_shift;
3413   *lgup_ptr = lgup;
3414   if (n < HOST_BITS_PER_WIDE_INT)
3415     {
3416       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3417       *multiplier_ptr = mhigh.to_uhwi () & mask;
3418       return mhigh.to_uhwi () >= mask;
3419     }
3420   else
3421     {
3422       *multiplier_ptr = mhigh.to_uhwi ();
3423       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3424     }
3425 }
3426
3427 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3428    congruent to 1 (mod 2**N).  */
3429
3430 static unsigned HOST_WIDE_INT
3431 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3432 {
3433   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3434
3435   /* The algorithm notes that the choice y = x satisfies
3436      x*y == 1 mod 2^3, since x is assumed odd.
3437      Each iteration doubles the number of bits of significance in y.  */
3438
3439   unsigned HOST_WIDE_INT mask;
3440   unsigned HOST_WIDE_INT y = x;
3441   int nbit = 3;
3442
3443   mask = (n == HOST_BITS_PER_WIDE_INT
3444           ? ~(unsigned HOST_WIDE_INT) 0
3445           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3446
3447   while (nbit < n)
3448     {
3449       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3450       nbit *= 2;
3451     }
3452   return y;
3453 }
3454
3455 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3456    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3457    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3458    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3459    become signed.
3460
3461    The result is put in TARGET if that is convenient.
3462
3463    MODE is the mode of operation.  */
3464
3465 rtx
3466 expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
3467                              rtx op1, rtx target, int unsignedp)
3468 {
3469   rtx tem;
3470   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3471
3472   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3473                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3474   tem = expand_and (mode, tem, op1, NULL_RTX);
3475   adj_operand
3476     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3477                      adj_operand);
3478
3479   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3480                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3481   tem = expand_and (mode, tem, op0, NULL_RTX);
3482   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3483                           target);
3484
3485   return target;
3486 }
3487
3488 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3489
3490 static rtx
3491 extract_high_half (machine_mode mode, rtx op)
3492 {
3493   machine_mode wider_mode;
3494
3495   if (mode == word_mode)
3496     return gen_highpart (mode, op);
3497
3498   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3499
3500   wider_mode = GET_MODE_WIDER_MODE (mode);
3501   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3502                      GET_MODE_BITSIZE (mode), 0, 1);
3503   return convert_modes (mode, wider_mode, op, 0);
3504 }
3505
3506 /* Like expmed_mult_highpart, but only consider using a multiplication
3507    optab.  OP1 is an rtx for the constant operand.  */
3508
3509 static rtx
3510 expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
3511                             rtx target, int unsignedp, int max_cost)
3512 {
3513   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3514   machine_mode wider_mode;
3515   optab moptab;
3516   rtx tem;
3517   int size;
3518   bool speed = optimize_insn_for_speed_p ();
3519
3520   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3521
3522   wider_mode = GET_MODE_WIDER_MODE (mode);
3523   size = GET_MODE_BITSIZE (mode);
3524
3525   /* Firstly, try using a multiplication insn that only generates the needed
3526      high part of the product, and in the sign flavor of unsignedp.  */
3527   if (mul_highpart_cost (speed, mode) < max_cost)
3528     {
3529       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3530       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3531                           unsignedp, OPTAB_DIRECT);
3532       if (tem)
3533         return tem;
3534     }
3535
3536   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3537      Need to adjust the result after the multiplication.  */
3538   if (size - 1 < BITS_PER_WORD
3539       && (mul_highpart_cost (speed, mode)
3540           + 2 * shift_cost (speed, mode, size-1)
3541           + 4 * add_cost (speed, mode) < max_cost))
3542     {
3543       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3544       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3545                           unsignedp, OPTAB_DIRECT);
3546       if (tem)
3547         /* We used the wrong signedness.  Adjust the result.  */
3548         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3549                                             tem, unsignedp);
3550     }
3551
3552   /* Try widening multiplication.  */
3553   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3554   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3555       && mul_widen_cost (speed, wider_mode) < max_cost)
3556     {
3557       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3558                           unsignedp, OPTAB_WIDEN);
3559       if (tem)
3560         return extract_high_half (mode, tem);
3561     }
3562
3563   /* Try widening the mode and perform a non-widening multiplication.  */
3564   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3565       && size - 1 < BITS_PER_WORD
3566       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3567           < max_cost))
3568     {
3569       rtx_insn *insns;
3570       rtx wop0, wop1;
3571
3572       /* We need to widen the operands, for example to ensure the
3573          constant multiplier is correctly sign or zero extended.
3574          Use a sequence to clean-up any instructions emitted by
3575          the conversions if things don't work out.  */
3576       start_sequence ();
3577       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3578       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3579       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3580                           unsignedp, OPTAB_WIDEN);
3581       insns = get_insns ();
3582       end_sequence ();
3583
3584       if (tem)
3585         {
3586           emit_insn (insns);
3587           return extract_high_half (mode, tem);
3588         }
3589     }
3590
3591   /* Try widening multiplication of opposite signedness, and adjust.  */
3592   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3593   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3594       && size - 1 < BITS_PER_WORD
3595       && (mul_widen_cost (speed, wider_mode)
3596           + 2 * shift_cost (speed, mode, size-1)
3597           + 4 * add_cost (speed, mode) < max_cost))
3598     {
3599       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3600                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3601       if (tem != 0)
3602         {
3603           tem = extract_high_half (mode, tem);
3604           /* We used the wrong signedness.  Adjust the result.  */
3605           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3606                                               target, unsignedp);
3607         }
3608     }
3609
3610   return 0;
3611 }
3612
3613 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3614    putting the high half of the result in TARGET if that is convenient,
3615    and return where the result is.  If the operation can not be performed,
3616    0 is returned.
3617
3618    MODE is the mode of operation and result.
3619
3620    UNSIGNEDP nonzero means unsigned multiply.
3621
3622    MAX_COST is the total allowed cost for the expanded RTL.  */
3623
3624 static rtx
3625 expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
3626                       rtx target, int unsignedp, int max_cost)
3627 {
3628   machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3629   unsigned HOST_WIDE_INT cnst1;
3630   int extra_cost;
3631   bool sign_adjust = false;
3632   enum mult_variant variant;
3633   struct algorithm alg;
3634   rtx tem;
3635   bool speed = optimize_insn_for_speed_p ();
3636
3637   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3638   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3639   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3640
3641   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3642
3643   /* We can't optimize modes wider than BITS_PER_WORD.
3644      ??? We might be able to perform double-word arithmetic if
3645      mode == word_mode, however all the cost calculations in
3646      synth_mult etc. assume single-word operations.  */
3647   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3648     return expmed_mult_highpart_optab (mode, op0, op1, target,
3649                                        unsignedp, max_cost);
3650
3651   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3652
3653   /* Check whether we try to multiply by a negative constant.  */
3654   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3655     {
3656       sign_adjust = true;
3657       extra_cost += add_cost (speed, mode);
3658     }
3659
3660   /* See whether shift/add multiplication is cheap enough.  */
3661   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3662                            max_cost - extra_cost))
3663     {
3664       /* See whether the specialized multiplication optabs are
3665          cheaper than the shift/add version.  */
3666       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3667                                         alg.cost.cost + extra_cost);
3668       if (tem)
3669         return tem;
3670
3671       tem = convert_to_mode (wider_mode, op0, unsignedp);
3672       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3673       tem = extract_high_half (mode, tem);
3674
3675       /* Adjust result for signedness.  */
3676       if (sign_adjust)
3677         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3678
3679       return tem;
3680     }
3681   return expmed_mult_highpart_optab (mode, op0, op1, target,
3682                                      unsignedp, max_cost);
3683 }
3684
3685
3686 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3687
3688 static rtx
3689 expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3690 {
3691   rtx result, temp, shift;
3692   rtx_code_label *label;
3693   int logd;
3694   int prec = GET_MODE_PRECISION (mode);
3695
3696   logd = floor_log2 (d);
3697   result = gen_reg_rtx (mode);
3698
3699   /* Avoid conditional branches when they're expensive.  */
3700   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3701       && optimize_insn_for_speed_p ())
3702     {
3703       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3704                                       mode, 0, -1);
3705       if (signmask)
3706         {
3707           HOST_WIDE_INT masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3708           signmask = force_reg (mode, signmask);
3709           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3710
3711           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3712              which instruction sequence to use.  If logical right shifts
3713              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3714              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3715
3716           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3717           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3718               || (set_src_cost (temp, optimize_insn_for_speed_p ())
3719                   > COSTS_N_INSNS (2)))
3720             {
3721               temp = expand_binop (mode, xor_optab, op0, signmask,
3722                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3723               temp = expand_binop (mode, sub_optab, temp, signmask,
3724                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3725               temp = expand_binop (mode, and_optab, temp,
3726                                    gen_int_mode (masklow, mode),
3727                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3728               temp = expand_binop (mode, xor_optab, temp, signmask,
3729                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3730               temp = expand_binop (mode, sub_optab, temp, signmask,
3731                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3732             }
3733           else
3734             {
3735               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3736                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3737               signmask = force_reg (mode, signmask);
3738
3739               temp = expand_binop (mode, add_optab, op0, signmask,
3740                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3741               temp = expand_binop (mode, and_optab, temp,
3742                                    gen_int_mode (masklow, mode),
3743                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3744               temp = expand_binop (mode, sub_optab, temp, signmask,
3745                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3746             }
3747           return temp;
3748         }
3749     }
3750
3751   /* Mask contains the mode's signbit and the significant bits of the
3752      modulus.  By including the signbit in the operation, many targets
3753      can avoid an explicit compare operation in the following comparison
3754      against zero.  */
3755   wide_int mask = wi::mask (logd, false, prec);
3756   mask = wi::set_bit (mask, prec - 1);
3757
3758   temp = expand_binop (mode, and_optab, op0,
3759                        immed_wide_int_const (mask, mode),
3760                        result, 1, OPTAB_LIB_WIDEN);
3761   if (temp != result)
3762     emit_move_insn (result, temp);
3763
3764   label = gen_label_rtx ();
3765   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3766
3767   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3768                        0, OPTAB_LIB_WIDEN);
3769
3770   mask = wi::mask (logd, true, prec);
3771   temp = expand_binop (mode, ior_optab, temp,
3772                        immed_wide_int_const (mask, mode),
3773                        result, 1, OPTAB_LIB_WIDEN);
3774   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3775                        0, OPTAB_LIB_WIDEN);
3776   if (temp != result)
3777     emit_move_insn (result, temp);
3778   emit_label (label);
3779   return result;
3780 }
3781
3782 /* Expand signed division of OP0 by a power of two D in mode MODE.
3783    This routine is only called for positive values of D.  */
3784
3785 static rtx
3786 expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3787 {
3788   rtx temp;
3789   rtx_code_label *label;
3790   int logd;
3791
3792   logd = floor_log2 (d);
3793
3794   if (d == 2
3795       && BRANCH_COST (optimize_insn_for_speed_p (),
3796                       false) >= 1)
3797     {
3798       temp = gen_reg_rtx (mode);
3799       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3800       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3801                            0, OPTAB_LIB_WIDEN);
3802       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3803     }
3804
3805   if (HAVE_conditional_move
3806       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
3807     {
3808       rtx temp2;
3809
3810       start_sequence ();
3811       temp2 = copy_to_mode_reg (mode, op0);
3812       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3813                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3814       temp = force_reg (mode, temp);
3815
3816       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3817       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3818                                      mode, temp, temp2, mode, 0);
3819       if (temp2)
3820         {
3821           rtx_insn *seq = get_insns ();
3822           end_sequence ();
3823           emit_insn (seq);
3824           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3825         }
3826       end_sequence ();
3827     }
3828
3829   if (BRANCH_COST (optimize_insn_for_speed_p (),
3830                    false) >= 2)
3831     {
3832       int ushift = GET_MODE_BITSIZE (mode) - logd;
3833
3834       temp = gen_reg_rtx (mode);
3835       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3836       if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3837           || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3838              > COSTS_N_INSNS (1))
3839         temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3840                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3841       else
3842         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3843                              ushift, NULL_RTX, 1);
3844       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3845                            0, OPTAB_LIB_WIDEN);
3846       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3847     }
3848
3849   label = gen_label_rtx ();
3850   temp = copy_to_mode_reg (mode, op0);
3851   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3852   expand_inc (temp, gen_int_mode (d - 1, mode));
3853   emit_label (label);
3854   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3855 }
3856 \f
3857 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3858    if that is convenient, and returning where the result is.
3859    You may request either the quotient or the remainder as the result;
3860    specify REM_FLAG nonzero to get the remainder.
3861
3862    CODE is the expression code for which kind of division this is;
3863    it controls how rounding is done.  MODE is the machine mode to use.
3864    UNSIGNEDP nonzero means do unsigned division.  */
3865
3866 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3867    and then correct it by or'ing in missing high bits
3868    if result of ANDI is nonzero.
3869    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3870    This could optimize to a bfexts instruction.
3871    But C doesn't use these operations, so their optimizations are
3872    left for later.  */
3873 /* ??? For modulo, we don't actually need the highpart of the first product,
3874    the low part will do nicely.  And for small divisors, the second multiply
3875    can also be a low-part only multiply or even be completely left out.
3876    E.g. to calculate the remainder of a division by 3 with a 32 bit
3877    multiply, multiply with 0x55555556 and extract the upper two bits;
3878    the result is exact for inputs up to 0x1fffffff.
3879    The input range can be reduced by using cross-sum rules.
3880    For odd divisors >= 3, the following table gives right shift counts
3881    so that if a number is shifted by an integer multiple of the given
3882    amount, the remainder stays the same:
3883    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3884    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3885    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3886    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3887    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3888
3889    Cross-sum rules for even numbers can be derived by leaving as many bits
3890    to the right alone as the divisor has zeros to the right.
3891    E.g. if x is an unsigned 32 bit number:
3892    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3893    */
3894
3895 rtx
3896 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
3897                rtx op0, rtx op1, rtx target, int unsignedp)
3898 {
3899   machine_mode compute_mode;
3900   rtx tquotient;
3901   rtx quotient = 0, remainder = 0;
3902   rtx_insn *last;
3903   int size;
3904   rtx_insn *insn;
3905   optab optab1, optab2;
3906   int op1_is_constant, op1_is_pow2 = 0;
3907   int max_cost, extra_cost;
3908   static HOST_WIDE_INT last_div_const = 0;
3909   bool speed = optimize_insn_for_speed_p ();
3910
3911   op1_is_constant = CONST_INT_P (op1);
3912   if (op1_is_constant)
3913     {
3914       unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1);
3915       if (unsignedp)
3916         ext_op1 &= GET_MODE_MASK (mode);
3917       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3918                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3919     }
3920
3921   /*
3922      This is the structure of expand_divmod:
3923
3924      First comes code to fix up the operands so we can perform the operations
3925      correctly and efficiently.
3926
3927      Second comes a switch statement with code specific for each rounding mode.
3928      For some special operands this code emits all RTL for the desired
3929      operation, for other cases, it generates only a quotient and stores it in
3930      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3931      to indicate that it has not done anything.
3932
3933      Last comes code that finishes the operation.  If QUOTIENT is set and
3934      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3935      QUOTIENT is not set, it is computed using trunc rounding.
3936
3937      We try to generate special code for division and remainder when OP1 is a
3938      constant.  If |OP1| = 2**n we can use shifts and some other fast
3939      operations.  For other values of OP1, we compute a carefully selected
3940      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3941      by m.
3942
3943      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3944      half of the product.  Different strategies for generating the product are
3945      implemented in expmed_mult_highpart.
3946
3947      If what we actually want is the remainder, we generate that by another
3948      by-constant multiplication and a subtraction.  */
3949
3950   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3951      code below will malfunction if we are, so check here and handle
3952      the special case if so.  */
3953   if (op1 == const1_rtx)
3954     return rem_flag ? const0_rtx : op0;
3955
3956     /* When dividing by -1, we could get an overflow.
3957      negv_optab can handle overflows.  */
3958   if (! unsignedp && op1 == constm1_rtx)
3959     {
3960       if (rem_flag)
3961         return const0_rtx;
3962       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
3963                           ? negv_optab : neg_optab, op0, target, 0);
3964     }
3965
3966   if (target
3967       /* Don't use the function value register as a target
3968          since we have to read it as well as write it,
3969          and function-inlining gets confused by this.  */
3970       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3971           /* Don't clobber an operand while doing a multi-step calculation.  */
3972           || ((rem_flag || op1_is_constant)
3973               && (reg_mentioned_p (target, op0)
3974                   || (MEM_P (op0) && MEM_P (target))))
3975           || reg_mentioned_p (target, op1)
3976           || (MEM_P (op1) && MEM_P (target))))
3977     target = 0;
3978
3979   /* Get the mode in which to perform this computation.  Normally it will
3980      be MODE, but sometimes we can't do the desired operation in MODE.
3981      If so, pick a wider mode in which we can do the operation.  Convert
3982      to that mode at the start to avoid repeated conversions.
3983
3984      First see what operations we need.  These depend on the expression
3985      we are evaluating.  (We assume that divxx3 insns exist under the
3986      same conditions that modxx3 insns and that these insns don't normally
3987      fail.  If these assumptions are not correct, we may generate less
3988      efficient code in some cases.)
3989
3990      Then see if we find a mode in which we can open-code that operation
3991      (either a division, modulus, or shift).  Finally, check for the smallest
3992      mode for which we can do the operation with a library call.  */
3993
3994   /* We might want to refine this now that we have division-by-constant
3995      optimization.  Since expmed_mult_highpart tries so many variants, it is
3996      not straightforward to generalize this.  Maybe we should make an array
3997      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3998
3999   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
4000             ? (unsignedp ? lshr_optab : ashr_optab)
4001             : (unsignedp ? udiv_optab : sdiv_optab));
4002   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
4003             ? optab1
4004             : (unsignedp ? udivmod_optab : sdivmod_optab));
4005
4006   for (compute_mode = mode; compute_mode != VOIDmode;
4007        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4008     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4009         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4010       break;
4011
4012   if (compute_mode == VOIDmode)
4013     for (compute_mode = mode; compute_mode != VOIDmode;
4014          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4015       if (optab_libfunc (optab1, compute_mode)
4016           || optab_libfunc (optab2, compute_mode))
4017         break;
4018
4019   /* If we still couldn't find a mode, use MODE, but expand_binop will
4020      probably die.  */
4021   if (compute_mode == VOIDmode)
4022     compute_mode = mode;
4023
4024   if (target && GET_MODE (target) == compute_mode)
4025     tquotient = target;
4026   else
4027     tquotient = gen_reg_rtx (compute_mode);
4028
4029   size = GET_MODE_BITSIZE (compute_mode);
4030 #if 0
4031   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4032      (mode), and thereby get better code when OP1 is a constant.  Do that
4033      later.  It will require going over all usages of SIZE below.  */
4034   size = GET_MODE_BITSIZE (mode);
4035 #endif
4036
4037   /* Only deduct something for a REM if the last divide done was
4038      for a different constant.   Then set the constant of the last
4039      divide.  */
4040   max_cost = (unsignedp
4041               ? udiv_cost (speed, compute_mode)
4042               : sdiv_cost (speed, compute_mode));
4043   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4044                      && INTVAL (op1) == last_div_const))
4045     max_cost -= (mul_cost (speed, compute_mode)
4046                  + add_cost (speed, compute_mode));
4047
4048   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4049
4050   /* Now convert to the best mode to use.  */
4051   if (compute_mode != mode)
4052     {
4053       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4054       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4055
4056       /* convert_modes may have placed op1 into a register, so we
4057          must recompute the following.  */
4058       op1_is_constant = CONST_INT_P (op1);
4059       op1_is_pow2 = (op1_is_constant
4060                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4061                           || (! unsignedp
4062                               && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1))))));
4063     }
4064
4065   /* If one of the operands is a volatile MEM, copy it into a register.  */
4066
4067   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4068     op0 = force_reg (compute_mode, op0);
4069   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4070     op1 = force_reg (compute_mode, op1);
4071
4072   /* If we need the remainder or if OP1 is constant, we need to
4073      put OP0 in a register in case it has any queued subexpressions.  */
4074   if (rem_flag || op1_is_constant)
4075     op0 = force_reg (compute_mode, op0);
4076
4077   last = get_last_insn ();
4078
4079   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4080   if (unsignedp)
4081     {
4082       if (code == FLOOR_DIV_EXPR)
4083         code = TRUNC_DIV_EXPR;
4084       if (code == FLOOR_MOD_EXPR)
4085         code = TRUNC_MOD_EXPR;
4086       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4087         code = TRUNC_DIV_EXPR;
4088     }
4089
4090   if (op1 != const0_rtx)
4091     switch (code)
4092       {
4093       case TRUNC_MOD_EXPR:
4094       case TRUNC_DIV_EXPR:
4095         if (op1_is_constant)
4096           {
4097             if (unsignedp)
4098               {
4099                 unsigned HOST_WIDE_INT mh, ml;
4100                 int pre_shift, post_shift;
4101                 int dummy;
4102                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4103                                             & GET_MODE_MASK (compute_mode));
4104
4105                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4106                   {
4107                     pre_shift = floor_log2 (d);
4108                     if (rem_flag)
4109                       {
4110                         unsigned HOST_WIDE_INT mask
4111                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4112                         remainder
4113                           = expand_binop (compute_mode, and_optab, op0,
4114                                           gen_int_mode (mask, compute_mode),
4115                                           remainder, 1,
4116                                           OPTAB_LIB_WIDEN);
4117                         if (remainder)
4118                           return gen_lowpart (mode, remainder);
4119                       }
4120                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4121                                              pre_shift, tquotient, 1);
4122                   }
4123                 else if (size <= HOST_BITS_PER_WIDE_INT)
4124                   {
4125                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4126                       {
4127                         /* Most significant bit of divisor is set; emit an scc
4128                            insn.  */
4129                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4130                                                           compute_mode, 1, 1);
4131                       }
4132                     else
4133                       {
4134                         /* Find a suitable multiplier and right shift count
4135                            instead of multiplying with D.  */
4136
4137                         mh = choose_multiplier (d, size, size,
4138                                                 &ml, &post_shift, &dummy);
4139
4140                         /* If the suggested multiplier is more than SIZE bits,
4141                            we can do better for even divisors, using an
4142                            initial right shift.  */
4143                         if (mh != 0 && (d & 1) == 0)
4144                           {
4145                             pre_shift = floor_log2 (d & -d);
4146                             mh = choose_multiplier (d >> pre_shift, size,
4147                                                     size - pre_shift,
4148                                                     &ml, &post_shift, &dummy);
4149                             gcc_assert (!mh);
4150                           }
4151                         else
4152                           pre_shift = 0;
4153
4154                         if (mh != 0)
4155                           {
4156                             rtx t1, t2, t3, t4;
4157
4158                             if (post_shift - 1 >= BITS_PER_WORD)
4159                               goto fail1;
4160
4161                             extra_cost
4162                               = (shift_cost (speed, compute_mode, post_shift - 1)
4163                                  + shift_cost (speed, compute_mode, 1)
4164                                  + 2 * add_cost (speed, compute_mode));
4165                             t1 = expmed_mult_highpart
4166                               (compute_mode, op0,
4167                                gen_int_mode (ml, compute_mode),
4168                                NULL_RTX, 1, max_cost - extra_cost);
4169                             if (t1 == 0)
4170                               goto fail1;
4171                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4172                                                                op0, t1),
4173                                                 NULL_RTX);
4174                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4175                                                t2, 1, NULL_RTX, 1);
4176                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4177                                                               t1, t3),
4178                                                 NULL_RTX);
4179                             quotient = expand_shift
4180                               (RSHIFT_EXPR, compute_mode, t4,
4181                                post_shift - 1, tquotient, 1);
4182                           }
4183                         else
4184                           {
4185                             rtx t1, t2;
4186
4187                             if (pre_shift >= BITS_PER_WORD
4188                                 || post_shift >= BITS_PER_WORD)
4189                               goto fail1;
4190
4191                             t1 = expand_shift
4192                               (RSHIFT_EXPR, compute_mode, op0,
4193                                pre_shift, NULL_RTX, 1);
4194                             extra_cost
4195                               = (shift_cost (speed, compute_mode, pre_shift)
4196                                  + shift_cost (speed, compute_mode, post_shift));
4197                             t2 = expmed_mult_highpart
4198                               (compute_mode, t1,
4199                                gen_int_mode (ml, compute_mode),
4200                                NULL_RTX, 1, max_cost - extra_cost);
4201                             if (t2 == 0)
4202                               goto fail1;
4203                             quotient = expand_shift
4204                               (RSHIFT_EXPR, compute_mode, t2,
4205                                post_shift, tquotient, 1);
4206                           }
4207                       }
4208                   }
4209                 else            /* Too wide mode to use tricky code */
4210                   break;
4211
4212                 insn = get_last_insn ();
4213                 if (insn != last)
4214                   set_dst_reg_note (insn, REG_EQUAL,
4215                                     gen_rtx_UDIV (compute_mode, op0, op1),
4216                                     quotient);
4217               }
4218             else                /* TRUNC_DIV, signed */
4219               {
4220                 unsigned HOST_WIDE_INT ml;
4221                 int lgup, post_shift;
4222                 rtx mlr;
4223                 HOST_WIDE_INT d = INTVAL (op1);
4224                 unsigned HOST_WIDE_INT abs_d;
4225
4226                 /* Since d might be INT_MIN, we have to cast to
4227                    unsigned HOST_WIDE_INT before negating to avoid
4228                    undefined signed overflow.  */
4229                 abs_d = (d >= 0
4230                          ? (unsigned HOST_WIDE_INT) d
4231                          : - (unsigned HOST_WIDE_INT) d);
4232
4233                 /* n rem d = n rem -d */
4234                 if (rem_flag && d < 0)
4235                   {
4236                     d = abs_d;
4237                     op1 = gen_int_mode (abs_d, compute_mode);
4238                   }
4239
4240                 if (d == 1)
4241                   quotient = op0;
4242                 else if (d == -1)
4243                   quotient = expand_unop (compute_mode, neg_optab, op0,
4244                                           tquotient, 0);
4245                 else if (HOST_BITS_PER_WIDE_INT >= size
4246                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4247                   {
4248                     /* This case is not handled correctly below.  */
4249                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4250                                                 compute_mode, 1, 1);
4251                     if (quotient == 0)
4252                       goto fail1;
4253                   }
4254                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4255                          && (rem_flag
4256                              ? smod_pow2_cheap (speed, compute_mode)
4257                              : sdiv_pow2_cheap (speed, compute_mode))
4258                          /* We assume that cheap metric is true if the
4259                             optab has an expander for this mode.  */
4260                          && ((optab_handler ((rem_flag ? smod_optab
4261                                               : sdiv_optab),
4262                                              compute_mode)
4263                               != CODE_FOR_nothing)
4264                              || (optab_handler (sdivmod_optab,
4265                                                 compute_mode)
4266                                  != CODE_FOR_nothing)))
4267                   ;
4268                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4269                   {
4270                     if (rem_flag)
4271                       {
4272                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4273                         if (remainder)
4274                           return gen_lowpart (mode, remainder);
4275                       }
4276
4277                     if (sdiv_pow2_cheap (speed, compute_mode)
4278                         && ((optab_handler (sdiv_optab, compute_mode)
4279                              != CODE_FOR_nothing)
4280                             || (optab_handler (sdivmod_optab, compute_mode)
4281                                 != CODE_FOR_nothing)))
4282                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4283                                                 compute_mode, op0,
4284                                                 gen_int_mode (abs_d,
4285                                                               compute_mode),
4286                                                 NULL_RTX, 0);
4287                     else
4288                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4289
4290                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4291                        negate the quotient.  */
4292                     if (d < 0)
4293                       {
4294                         insn = get_last_insn ();
4295                         if (insn != last
4296                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4297                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4298                           set_dst_reg_note (insn, REG_EQUAL,
4299                                             gen_rtx_DIV (compute_mode, op0,
4300                                                          gen_int_mode
4301                                                            (abs_d,
4302                                                             compute_mode)),
4303                                             quotient);
4304
4305                         quotient = expand_unop (compute_mode, neg_optab,
4306                                                 quotient, quotient, 0);
4307                       }
4308                   }
4309                 else if (size <= HOST_BITS_PER_WIDE_INT)
4310                   {
4311                     choose_multiplier (abs_d, size, size - 1,
4312                                        &ml, &post_shift, &lgup);
4313                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4314                       {
4315                         rtx t1, t2, t3;
4316
4317                         if (post_shift >= BITS_PER_WORD
4318                             || size - 1 >= BITS_PER_WORD)
4319                           goto fail1;
4320
4321                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4322                                       + shift_cost (speed, compute_mode, size - 1)
4323                                       + add_cost (speed, compute_mode));
4324                         t1 = expmed_mult_highpart
4325                           (compute_mode, op0, gen_int_mode (ml, compute_mode),
4326                            NULL_RTX, 0, max_cost - extra_cost);
4327                         if (t1 == 0)
4328                           goto fail1;
4329                         t2 = expand_shift
4330                           (RSHIFT_EXPR, compute_mode, t1,
4331                            post_shift, NULL_RTX, 0);
4332                         t3 = expand_shift
4333                           (RSHIFT_EXPR, compute_mode, op0,
4334                            size - 1, NULL_RTX, 0);
4335                         if (d < 0)
4336                           quotient
4337                             = force_operand (gen_rtx_MINUS (compute_mode,
4338                                                             t3, t2),
4339                                              tquotient);
4340                         else
4341                           quotient
4342                             = force_operand (gen_rtx_MINUS (compute_mode,
4343                                                             t2, t3),
4344                                              tquotient);
4345                       }
4346                     else
4347                       {
4348                         rtx t1, t2, t3, t4;
4349
4350                         if (post_shift >= BITS_PER_WORD
4351                             || size - 1 >= BITS_PER_WORD)
4352                           goto fail1;
4353
4354                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4355                         mlr = gen_int_mode (ml, compute_mode);
4356                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4357                                       + shift_cost (speed, compute_mode, size - 1)
4358                                       + 2 * add_cost (speed, compute_mode));
4359                         t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4360                                                    NULL_RTX, 0,
4361                                                    max_cost - extra_cost);
4362                         if (t1 == 0)
4363                           goto fail1;
4364                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4365                                                           t1, op0),
4366                                             NULL_RTX);
4367                         t3 = expand_shift
4368                           (RSHIFT_EXPR, compute_mode, t2,
4369                            post_shift, NULL_RTX, 0);
4370                         t4 = expand_shift
4371                           (RSHIFT_EXPR, compute_mode, op0,
4372                            size - 1, NULL_RTX, 0);
4373                         if (d < 0)
4374                           quotient
4375                             = force_operand (gen_rtx_MINUS (compute_mode,
4376                                                             t4, t3),
4377                                              tquotient);
4378                         else
4379                           quotient
4380                             = force_operand (gen_rtx_MINUS (compute_mode,
4381                                                             t3, t4),
4382                                              tquotient);
4383                       }
4384                   }
4385                 else            /* Too wide mode to use tricky code */
4386                   break;
4387
4388                 insn = get_last_insn ();
4389                 if (insn != last)
4390                   set_dst_reg_note (insn, REG_EQUAL,
4391                                     gen_rtx_DIV (compute_mode, op0, op1),
4392                                     quotient);
4393               }
4394             break;
4395           }
4396       fail1:
4397         delete_insns_since (last);
4398         break;
4399
4400       case FLOOR_DIV_EXPR:
4401       case FLOOR_MOD_EXPR:
4402       /* We will come here only for signed operations.  */
4403         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4404           {
4405             unsigned HOST_WIDE_INT mh, ml;
4406             int pre_shift, lgup, post_shift;
4407             HOST_WIDE_INT d = INTVAL (op1);
4408
4409             if (d > 0)
4410               {
4411                 /* We could just as easily deal with negative constants here,
4412                    but it does not seem worth the trouble for GCC 2.6.  */
4413                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4414                   {
4415                     pre_shift = floor_log2 (d);
4416                     if (rem_flag)
4417                       {
4418                         unsigned HOST_WIDE_INT mask
4419                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4420                         remainder = expand_binop
4421                           (compute_mode, and_optab, op0,
4422                            gen_int_mode (mask, compute_mode),
4423                            remainder, 0, OPTAB_LIB_WIDEN);
4424                         if (remainder)
4425                           return gen_lowpart (mode, remainder);
4426                       }
4427                     quotient = expand_shift
4428                       (RSHIFT_EXPR, compute_mode, op0,
4429                        pre_shift, tquotient, 0);
4430                   }
4431                 else
4432                   {
4433                     rtx t1, t2, t3, t4;
4434
4435                     mh = choose_multiplier (d, size, size - 1,
4436                                             &ml, &post_shift, &lgup);
4437                     gcc_assert (!mh);
4438
4439                     if (post_shift < BITS_PER_WORD
4440                         && size - 1 < BITS_PER_WORD)
4441                       {
4442                         t1 = expand_shift
4443                           (RSHIFT_EXPR, compute_mode, op0,
4444                            size - 1, NULL_RTX, 0);
4445                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4446                                            NULL_RTX, 0, OPTAB_WIDEN);
4447                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4448                                       + shift_cost (speed, compute_mode, size - 1)
4449                                       + 2 * add_cost (speed, compute_mode));
4450                         t3 = expmed_mult_highpart
4451                           (compute_mode, t2, gen_int_mode (ml, compute_mode),
4452                            NULL_RTX, 1, max_cost - extra_cost);
4453                         if (t3 != 0)
4454                           {
4455                             t4 = expand_shift
4456                               (RSHIFT_EXPR, compute_mode, t3,
4457                                post_shift, NULL_RTX, 1);
4458                             quotient = expand_binop (compute_mode, xor_optab,
4459                                                      t4, t1, tquotient, 0,
4460                                                      OPTAB_WIDEN);
4461                           }
4462                       }
4463                   }
4464               }
4465             else
4466               {
4467                 rtx nsign, t1, t2, t3, t4;
4468                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4469                                                   op0, constm1_rtx), NULL_RTX);
4470                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4471                                    0, OPTAB_WIDEN);
4472                 nsign = expand_shift
4473                   (RSHIFT_EXPR, compute_mode, t2,
4474                    size - 1, NULL_RTX, 0);
4475                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4476                                     NULL_RTX);
4477                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4478                                     NULL_RTX, 0);
4479                 if (t4)
4480                   {
4481                     rtx t5;
4482                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4483                                       NULL_RTX, 0);
4484                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4485                                                             t4, t5),
4486                                               tquotient);
4487                   }
4488               }
4489           }
4490
4491         if (quotient != 0)
4492           break;
4493         delete_insns_since (last);
4494
4495         /* Try using an instruction that produces both the quotient and
4496            remainder, using truncation.  We can easily compensate the quotient
4497            or remainder to get floor rounding, once we have the remainder.
4498            Notice that we compute also the final remainder value here,
4499            and return the result right away.  */
4500         if (target == 0 || GET_MODE (target) != compute_mode)
4501           target = gen_reg_rtx (compute_mode);
4502
4503         if (rem_flag)
4504           {
4505             remainder
4506               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4507             quotient = gen_reg_rtx (compute_mode);
4508           }
4509         else
4510           {
4511             quotient
4512               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4513             remainder = gen_reg_rtx (compute_mode);
4514           }
4515
4516         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4517                                  quotient, remainder, 0))
4518           {
4519             /* This could be computed with a branch-less sequence.
4520                Save that for later.  */
4521             rtx tem;
4522             rtx_code_label *label = gen_label_rtx ();
4523             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4524             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4525                                 NULL_RTX, 0, OPTAB_WIDEN);
4526             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4527             expand_dec (quotient, const1_rtx);
4528             expand_inc (remainder, op1);
4529             emit_label (label);
4530             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4531           }
4532
4533         /* No luck with division elimination or divmod.  Have to do it
4534            by conditionally adjusting op0 *and* the result.  */
4535         {
4536           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4537           rtx adjusted_op0;
4538           rtx tem;
4539
4540           quotient = gen_reg_rtx (compute_mode);
4541           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4542           label1 = gen_label_rtx ();
4543           label2 = gen_label_rtx ();
4544           label3 = gen_label_rtx ();
4545           label4 = gen_label_rtx ();
4546           label5 = gen_label_rtx ();
4547           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4548           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4549           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4550                               quotient, 0, OPTAB_LIB_WIDEN);
4551           if (tem != quotient)
4552             emit_move_insn (quotient, tem);
4553           emit_jump_insn (gen_jump (label5));
4554           emit_barrier ();
4555           emit_label (label1);
4556           expand_inc (adjusted_op0, const1_rtx);
4557           emit_jump_insn (gen_jump (label4));
4558           emit_barrier ();
4559           emit_label (label2);
4560           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4561           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4562                               quotient, 0, OPTAB_LIB_WIDEN);
4563           if (tem != quotient)
4564             emit_move_insn (quotient, tem);
4565           emit_jump_insn (gen_jump (label5));
4566           emit_barrier ();
4567           emit_label (label3);
4568           expand_dec (adjusted_op0, const1_rtx);
4569           emit_label (label4);
4570           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4571                               quotient, 0, OPTAB_LIB_WIDEN);
4572           if (tem != quotient)
4573             emit_move_insn (quotient, tem);
4574           expand_dec (quotient, const1_rtx);
4575           emit_label (label5);
4576         }
4577         break;
4578
4579       case CEIL_DIV_EXPR:
4580       case CEIL_MOD_EXPR:
4581         if (unsignedp)
4582           {
4583             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4584               {
4585                 rtx t1, t2, t3;
4586                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4587                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4588                                    floor_log2 (d), tquotient, 1);
4589                 t2 = expand_binop (compute_mode, and_optab, op0,
4590                                    gen_int_mode (d - 1, compute_mode),
4591                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4592                 t3 = gen_reg_rtx (compute_mode);
4593                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4594                                       compute_mode, 1, 1);
4595                 if (t3 == 0)
4596                   {
4597                     rtx_code_label *lab;
4598                     lab = gen_label_rtx ();
4599                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4600                     expand_inc (t1, const1_rtx);
4601                     emit_label (lab);
4602                     quotient = t1;
4603                   }
4604                 else
4605                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4606                                                           t1, t3),
4607                                             tquotient);
4608                 break;
4609               }
4610
4611             /* Try using an instruction that produces both the quotient and
4612                remainder, using truncation.  We can easily compensate the
4613                quotient or remainder to get ceiling rounding, once we have the
4614                remainder.  Notice that we compute also the final remainder
4615                value here, and return the result right away.  */
4616             if (target == 0 || GET_MODE (target) != compute_mode)
4617               target = gen_reg_rtx (compute_mode);
4618
4619             if (rem_flag)
4620               {
4621                 remainder = (REG_P (target)
4622                              ? target : gen_reg_rtx (compute_mode));
4623                 quotient = gen_reg_rtx (compute_mode);
4624               }
4625             else
4626               {
4627                 quotient = (REG_P (target)
4628                             ? target : gen_reg_rtx (compute_mode));
4629                 remainder = gen_reg_rtx (compute_mode);
4630               }
4631
4632             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4633                                      remainder, 1))
4634               {
4635                 /* This could be computed with a branch-less sequence.
4636                    Save that for later.  */
4637                 rtx_code_label *label = gen_label_rtx ();
4638                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4639                                  compute_mode, label);
4640                 expand_inc (quotient, const1_rtx);
4641                 expand_dec (remainder, op1);
4642                 emit_label (label);
4643                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4644               }
4645
4646             /* No luck with division elimination or divmod.  Have to do it
4647                by conditionally adjusting op0 *and* the result.  */
4648             {
4649               rtx_code_label *label1, *label2;
4650               rtx adjusted_op0, tem;
4651
4652               quotient = gen_reg_rtx (compute_mode);
4653               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4654               label1 = gen_label_rtx ();
4655               label2 = gen_label_rtx ();
4656               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4657                                compute_mode, label1);
4658               emit_move_insn  (quotient, const0_rtx);
4659               emit_jump_insn (gen_jump (label2));
4660               emit_barrier ();
4661               emit_label (label1);
4662               expand_dec (adjusted_op0, const1_rtx);
4663               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4664                                   quotient, 1, OPTAB_LIB_WIDEN);
4665               if (tem != quotient)
4666                 emit_move_insn (quotient, tem);
4667               expand_inc (quotient, const1_rtx);
4668               emit_label (label2);
4669             }
4670           }
4671         else /* signed */
4672           {
4673             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4674                 && INTVAL (op1) >= 0)
4675               {
4676                 /* This is extremely similar to the code for the unsigned case
4677                    above.  For 2.7 we should merge these variants, but for
4678                    2.6.1 I don't want to touch the code for unsigned since that
4679                    get used in C.  The signed case will only be used by other
4680                    languages (Ada).  */
4681
4682                 rtx t1, t2, t3;
4683                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4684                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4685                                    floor_log2 (d), tquotient, 0);
4686                 t2 = expand_binop (compute_mode, and_optab, op0,
4687                                    gen_int_mode (d - 1, compute_mode),
4688                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4689                 t3 = gen_reg_rtx (compute_mode);
4690                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4691                                       compute_mode, 1, 1);
4692                 if (t3 == 0)
4693                   {
4694                     rtx_code_label *lab;
4695                     lab = gen_label_rtx ();
4696                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4697                     expand_inc (t1, const1_rtx);
4698                     emit_label (lab);
4699                     quotient = t1;
4700                   }
4701                 else
4702                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4703                                                           t1, t3),
4704                                             tquotient);
4705                 break;
4706               }
4707
4708             /* Try using an instruction that produces both the quotient and
4709                remainder, using truncation.  We can easily compensate the
4710                quotient or remainder to get ceiling rounding, once we have the
4711                remainder.  Notice that we compute also the final remainder
4712                value here, and return the result right away.  */
4713             if (target == 0 || GET_MODE (target) != compute_mode)
4714               target = gen_reg_rtx (compute_mode);
4715             if (rem_flag)
4716               {
4717                 remainder= (REG_P (target)
4718                             ? target : gen_reg_rtx (compute_mode));
4719                 quotient = gen_reg_rtx (compute_mode);
4720               }
4721             else
4722               {
4723                 quotient = (REG_P (target)
4724                             ? target : gen_reg_rtx (compute_mode));
4725                 remainder = gen_reg_rtx (compute_mode);
4726               }
4727
4728             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4729                                      remainder, 0))
4730               {
4731                 /* This could be computed with a branch-less sequence.
4732                    Save that for later.  */
4733                 rtx tem;
4734                 rtx_code_label *label = gen_label_rtx ();
4735                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4736                                  compute_mode, label);
4737                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4738                                     NULL_RTX, 0, OPTAB_WIDEN);
4739                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4740                 expand_inc (quotient, const1_rtx);
4741                 expand_dec (remainder, op1);
4742                 emit_label (label);
4743                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4744               }
4745
4746             /* No luck with division elimination or divmod.  Have to do it
4747                by conditionally adjusting op0 *and* the result.  */
4748             {
4749               rtx_code_label *label1, *label2, *label3, *label4, *label5;
4750               rtx adjusted_op0;
4751               rtx tem;
4752
4753               quotient = gen_reg_rtx (compute_mode);
4754               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4755               label1 = gen_label_rtx ();
4756               label2 = gen_label_rtx ();
4757               label3 = gen_label_rtx ();
4758               label4 = gen_label_rtx ();
4759               label5 = gen_label_rtx ();
4760               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4761               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4762                                compute_mode, label1);
4763               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4764                                   quotient, 0, OPTAB_LIB_WIDEN);
4765               if (tem != quotient)
4766                 emit_move_insn (quotient, tem);
4767               emit_jump_insn (gen_jump (label5));
4768               emit_barrier ();
4769               emit_label (label1);
4770               expand_dec (adjusted_op0, const1_rtx);
4771               emit_jump_insn (gen_jump (label4));
4772               emit_barrier ();
4773               emit_label (label2);
4774               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4775                                compute_mode, label3);
4776               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4777                                   quotient, 0, OPTAB_LIB_WIDEN);
4778               if (tem != quotient)
4779                 emit_move_insn (quotient, tem);
4780               emit_jump_insn (gen_jump (label5));
4781               emit_barrier ();
4782               emit_label (label3);
4783               expand_inc (adjusted_op0, const1_rtx);
4784               emit_label (label4);
4785               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4786                                   quotient, 0, OPTAB_LIB_WIDEN);
4787               if (tem != quotient)
4788                 emit_move_insn (quotient, tem);
4789               expand_inc (quotient, const1_rtx);
4790               emit_label (label5);
4791             }
4792           }
4793         break;
4794
4795       case EXACT_DIV_EXPR:
4796         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4797           {
4798             HOST_WIDE_INT d = INTVAL (op1);
4799             unsigned HOST_WIDE_INT ml;
4800             int pre_shift;
4801             rtx t1;
4802
4803             pre_shift = floor_log2 (d & -d);
4804             ml = invert_mod2n (d >> pre_shift, size);
4805             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4806                                pre_shift, NULL_RTX, unsignedp);
4807             quotient = expand_mult (compute_mode, t1,
4808                                     gen_int_mode (ml, compute_mode),
4809                                     NULL_RTX, 1);
4810
4811             insn = get_last_insn ();
4812             set_dst_reg_note (insn, REG_EQUAL,
4813                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4814                                               compute_mode, op0, op1),
4815                               quotient);
4816           }
4817         break;
4818
4819       case ROUND_DIV_EXPR:
4820       case ROUND_MOD_EXPR:
4821         if (unsignedp)
4822           {
4823             rtx tem;
4824             rtx_code_label *label;
4825             label = gen_label_rtx ();
4826             quotient = gen_reg_rtx (compute_mode);
4827             remainder = gen_reg_rtx (compute_mode);
4828             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4829               {
4830                 rtx tem;
4831                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4832                                          quotient, 1, OPTAB_LIB_WIDEN);
4833                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4834                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4835                                           remainder, 1, OPTAB_LIB_WIDEN);
4836               }
4837             tem = plus_constant (compute_mode, op1, -1);
4838             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4839             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4840             expand_inc (quotient, const1_rtx);
4841             expand_dec (remainder, op1);
4842             emit_label (label);
4843           }
4844         else
4845           {
4846             rtx abs_rem, abs_op1, tem, mask;
4847             rtx_code_label *label;
4848             label = gen_label_rtx ();
4849             quotient = gen_reg_rtx (compute_mode);
4850             remainder = gen_reg_rtx (compute_mode);
4851             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4852               {
4853                 rtx tem;
4854                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4855                                          quotient, 0, OPTAB_LIB_WIDEN);
4856                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4857                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4858                                           remainder, 0, OPTAB_LIB_WIDEN);
4859               }
4860             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4861             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4862             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4863                                 1, NULL_RTX, 1);
4864             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4865             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4866                                 NULL_RTX, 0, OPTAB_WIDEN);
4867             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4868                                  size - 1, NULL_RTX, 0);
4869             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4870                                 NULL_RTX, 0, OPTAB_WIDEN);
4871             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4872                                 NULL_RTX, 0, OPTAB_WIDEN);
4873             expand_inc (quotient, tem);
4874             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4875                                 NULL_RTX, 0, OPTAB_WIDEN);
4876             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4877                                 NULL_RTX, 0, OPTAB_WIDEN);
4878             expand_dec (remainder, tem);
4879             emit_label (label);
4880           }
4881         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4882
4883       default:
4884         gcc_unreachable ();
4885       }
4886
4887   if (quotient == 0)
4888     {
4889       if (target && GET_MODE (target) != compute_mode)
4890         target = 0;
4891
4892       if (rem_flag)
4893         {
4894           /* Try to produce the remainder without producing the quotient.
4895              If we seem to have a divmod pattern that does not require widening,
4896              don't try widening here.  We should really have a WIDEN argument
4897              to expand_twoval_binop, since what we'd really like to do here is
4898              1) try a mod insn in compute_mode
4899              2) try a divmod insn in compute_mode
4900              3) try a div insn in compute_mode and multiply-subtract to get
4901                 remainder
4902              4) try the same things with widening allowed.  */
4903           remainder
4904             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4905                                  op0, op1, target,
4906                                  unsignedp,
4907                                  ((optab_handler (optab2, compute_mode)
4908                                    != CODE_FOR_nothing)
4909                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4910           if (remainder == 0)
4911             {
4912               /* No luck there.  Can we do remainder and divide at once
4913                  without a library call?  */
4914               remainder = gen_reg_rtx (compute_mode);
4915               if (! expand_twoval_binop ((unsignedp
4916                                           ? udivmod_optab
4917                                           : sdivmod_optab),
4918                                          op0, op1,
4919                                          NULL_RTX, remainder, unsignedp))
4920                 remainder = 0;
4921             }
4922
4923           if (remainder)
4924             return gen_lowpart (mode, remainder);
4925         }
4926
4927       /* Produce the quotient.  Try a quotient insn, but not a library call.
4928          If we have a divmod in this mode, use it in preference to widening
4929          the div (for this test we assume it will not fail). Note that optab2
4930          is set to the one of the two optabs that the call below will use.  */
4931       quotient
4932         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4933                              op0, op1, rem_flag ? NULL_RTX : target,
4934                              unsignedp,
4935                              ((optab_handler (optab2, compute_mode)
4936                                != CODE_FOR_nothing)
4937                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4938
4939       if (quotient == 0)
4940         {
4941           /* No luck there.  Try a quotient-and-remainder insn,
4942              keeping the quotient alone.  */
4943           quotient = gen_reg_rtx (compute_mode);
4944           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4945                                      op0, op1,
4946                                      quotient, NULL_RTX, unsignedp))
4947             {
4948               quotient = 0;
4949               if (! rem_flag)
4950                 /* Still no luck.  If we are not computing the remainder,
4951                    use a library call for the quotient.  */
4952                 quotient = sign_expand_binop (compute_mode,
4953                                               udiv_optab, sdiv_optab,
4954                                               op0, op1, target,
4955                                               unsignedp, OPTAB_LIB_WIDEN);
4956             }
4957         }
4958     }
4959
4960   if (rem_flag)
4961     {
4962       if (target && GET_MODE (target) != compute_mode)
4963         target = 0;
4964
4965       if (quotient == 0)
4966         {
4967           /* No divide instruction either.  Use library for remainder.  */
4968           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4969                                          op0, op1, target,
4970                                          unsignedp, OPTAB_LIB_WIDEN);
4971           /* No remainder function.  Try a quotient-and-remainder
4972              function, keeping the remainder.  */
4973           if (!remainder)
4974             {
4975               remainder = gen_reg_rtx (compute_mode);
4976               if (!expand_twoval_binop_libfunc
4977                   (unsignedp ? udivmod_optab : sdivmod_optab,
4978                    op0, op1,
4979                    NULL_RTX, remainder,
4980                    unsignedp ? UMOD : MOD))
4981                 remainder = NULL_RTX;
4982             }
4983         }
4984       else
4985         {
4986           /* We divided.  Now finish doing X - Y * (X / Y).  */
4987           remainder = expand_mult (compute_mode, quotient, op1,
4988                                    NULL_RTX, unsignedp);
4989           remainder = expand_binop (compute_mode, sub_optab, op0,
4990                                     remainder, target, unsignedp,
4991                                     OPTAB_LIB_WIDEN);
4992         }
4993     }
4994
4995   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4996 }
4997 \f
4998 /* Return a tree node with data type TYPE, describing the value of X.
4999    Usually this is an VAR_DECL, if there is no obvious better choice.
5000    X may be an expression, however we only support those expressions
5001    generated by loop.c.  */
5002
5003 tree
5004 make_tree (tree type, rtx x)
5005 {
5006   tree t;
5007
5008   switch (GET_CODE (x))
5009     {
5010     case CONST_INT:
5011     case CONST_WIDE_INT:
5012       t = wide_int_to_tree (type, std::make_pair (x, TYPE_MODE (type)));
5013       return t;
5014
5015     case CONST_DOUBLE:
5016       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5017       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5018         t = wide_int_to_tree (type,
5019                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5020                                                     HOST_BITS_PER_WIDE_INT * 2));
5021       else
5022         {
5023           REAL_VALUE_TYPE d;
5024
5025           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
5026           t = build_real (type, d);
5027         }
5028
5029       return t;
5030
5031     case CONST_VECTOR:
5032       {
5033         int units = CONST_VECTOR_NUNITS (x);
5034         tree itype = TREE_TYPE (type);
5035         tree *elts;
5036         int i;
5037
5038         /* Build a tree with vector elements.  */
5039         elts = XALLOCAVEC (tree, units);
5040         for (i = units - 1; i >= 0; --i)
5041           {
5042             rtx elt = CONST_VECTOR_ELT (x, i);
5043             elts[i] = make_tree (itype, elt);
5044           }
5045
5046         return build_vector (type, elts);
5047       }
5048
5049     case PLUS:
5050       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5051                           make_tree (type, XEXP (x, 1)));
5052
5053     case MINUS:
5054       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5055                           make_tree (type, XEXP (x, 1)));
5056
5057     case NEG:
5058       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5059
5060     case MULT:
5061       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5062                           make_tree (type, XEXP (x, 1)));
5063
5064     case ASHIFT:
5065       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5066                           make_tree (type, XEXP (x, 1)));
5067
5068     case LSHIFTRT:
5069       t = unsigned_type_for (type);
5070       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5071                                          make_tree (t, XEXP (x, 0)),
5072                                          make_tree (type, XEXP (x, 1))));
5073
5074     case ASHIFTRT:
5075       t = signed_type_for (type);
5076       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5077                                          make_tree (t, XEXP (x, 0)),
5078                                          make_tree (type, XEXP (x, 1))));
5079
5080     case DIV:
5081       if (TREE_CODE (type) != REAL_TYPE)
5082         t = signed_type_for (type);
5083       else
5084         t = type;
5085
5086       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5087                                          make_tree (t, XEXP (x, 0)),
5088                                          make_tree (t, XEXP (x, 1))));
5089     case UDIV:
5090       t = unsigned_type_for (type);
5091       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5092                                          make_tree (t, XEXP (x, 0)),
5093                                          make_tree (t, XEXP (x, 1))));
5094
5095     case SIGN_EXTEND:
5096     case ZERO_EXTEND:
5097       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5098                                           GET_CODE (x) == ZERO_EXTEND);
5099       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5100
5101     case CONST:
5102       return make_tree (type, XEXP (x, 0));
5103
5104     case SYMBOL_REF:
5105       t = SYMBOL_REF_DECL (x);
5106       if (t)
5107         return fold_convert (type, build_fold_addr_expr (t));
5108       /* else fall through.  */
5109
5110     default:
5111       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5112
5113       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5114          address mode to pointer mode.  */
5115       if (POINTER_TYPE_P (type))
5116         x = convert_memory_address_addr_space
5117               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5118
5119       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5120          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5121       t->decl_with_rtl.rtl = x;
5122
5123       return t;
5124     }
5125 }
5126 \f
5127 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5128    and returning TARGET.
5129
5130    If TARGET is 0, a pseudo-register or constant is returned.  */
5131
5132 rtx
5133 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5134 {
5135   rtx tem = 0;
5136
5137   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5138     tem = simplify_binary_operation (AND, mode, op0, op1);
5139   if (tem == 0)
5140     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5141
5142   if (target == 0)
5143     target = tem;
5144   else if (tem != target)
5145     emit_move_insn (target, tem);
5146   return target;
5147 }
5148
5149 /* Helper function for emit_store_flag.  */
5150 rtx
5151 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5152              machine_mode mode, machine_mode compare_mode,
5153              int unsignedp, rtx x, rtx y, int normalizep,
5154              machine_mode target_mode)
5155 {
5156   struct expand_operand ops[4];
5157   rtx op0, comparison, subtarget;
5158   rtx_insn *last;
5159   machine_mode result_mode = targetm.cstore_mode (icode);
5160
5161   last = get_last_insn ();
5162   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5163   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5164   if (!x || !y)
5165     {
5166       delete_insns_since (last);
5167       return NULL_RTX;
5168     }
5169
5170   if (target_mode == VOIDmode)
5171     target_mode = result_mode;
5172   if (!target)
5173     target = gen_reg_rtx (target_mode);
5174
5175   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5176
5177   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5178   create_fixed_operand (&ops[1], comparison);
5179   create_fixed_operand (&ops[2], x);
5180   create_fixed_operand (&ops[3], y);
5181   if (!maybe_expand_insn (icode, 4, ops))
5182     {
5183       delete_insns_since (last);
5184       return NULL_RTX;
5185     }
5186   subtarget = ops[0].value;
5187
5188   /* If we are converting to a wider mode, first convert to
5189      TARGET_MODE, then normalize.  This produces better combining
5190      opportunities on machines that have a SIGN_EXTRACT when we are
5191      testing a single bit.  This mostly benefits the 68k.
5192
5193      If STORE_FLAG_VALUE does not have the sign bit set when
5194      interpreted in MODE, we can do this conversion as unsigned, which
5195      is usually more efficient.  */
5196   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5197     {
5198       convert_move (target, subtarget,
5199                     val_signbit_known_clear_p (result_mode,
5200                                                STORE_FLAG_VALUE));
5201       op0 = target;
5202       result_mode = target_mode;
5203     }
5204   else
5205     op0 = subtarget;
5206
5207   /* If we want to keep subexpressions around, don't reuse our last
5208      target.  */
5209   if (optimize)
5210     subtarget = 0;
5211
5212   /* Now normalize to the proper value in MODE.  Sometimes we don't
5213      have to do anything.  */
5214   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5215     ;
5216   /* STORE_FLAG_VALUE might be the most negative number, so write
5217      the comparison this way to avoid a compiler-time warning.  */
5218   else if (- normalizep == STORE_FLAG_VALUE)
5219     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5220
5221   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5222      it hard to use a value of just the sign bit due to ANSI integer
5223      constant typing rules.  */
5224   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5225     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5226                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5227                         normalizep == 1);
5228   else
5229     {
5230       gcc_assert (STORE_FLAG_VALUE & 1);
5231
5232       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5233       if (normalizep == -1)
5234         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5235     }
5236
5237   /* If we were converting to a smaller mode, do the conversion now.  */
5238   if (target_mode != result_mode)
5239     {
5240       convert_move (target, op0, 0);
5241       return target;
5242     }
5243   else
5244     return op0;
5245 }
5246
5247
5248 /* A subroutine of emit_store_flag only including "tricks" that do not
5249    need a recursive call.  These are kept separate to avoid infinite
5250    loops.  */
5251
5252 static rtx
5253 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5254                    machine_mode mode, int unsignedp, int normalizep,
5255                    machine_mode target_mode)
5256 {
5257   rtx subtarget;
5258   enum insn_code icode;
5259   machine_mode compare_mode;
5260   enum mode_class mclass;
5261   enum rtx_code scode;
5262
5263   if (unsignedp)
5264     code = unsigned_condition (code);
5265   scode = swap_condition (code);
5266
5267   /* If one operand is constant, make it the second one.  Only do this
5268      if the other operand is not constant as well.  */
5269
5270   if (swap_commutative_operands_p (op0, op1))
5271     {
5272       std::swap (op0, op1);
5273       code = swap_condition (code);
5274     }
5275
5276   if (mode == VOIDmode)
5277     mode = GET_MODE (op0);
5278
5279   /* For some comparisons with 1 and -1, we can convert this to
5280      comparisons with zero.  This will often produce more opportunities for
5281      store-flag insns.  */
5282
5283   switch (code)
5284     {
5285     case LT:
5286       if (op1 == const1_rtx)
5287         op1 = const0_rtx, code = LE;
5288       break;
5289     case LE:
5290       if (op1 == constm1_rtx)
5291         op1 = const0_rtx, code = LT;
5292       break;
5293     case GE:
5294       if (op1 == const1_rtx)
5295         op1 = const0_rtx, code = GT;
5296       break;
5297     case GT:
5298       if (op1 == constm1_rtx)
5299         op1 = const0_rtx, code = GE;
5300       break;
5301     case GEU:
5302       if (op1 == const1_rtx)
5303         op1 = const0_rtx, code = NE;
5304       break;
5305     case LTU:
5306       if (op1 == const1_rtx)
5307         op1 = const0_rtx, code = EQ;
5308       break;
5309     default:
5310       break;
5311     }
5312
5313   /* If we are comparing a double-word integer with zero or -1, we can
5314      convert the comparison into one involving a single word.  */
5315   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5316       && GET_MODE_CLASS (mode) == MODE_INT
5317       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5318     {
5319       rtx tem;
5320       if ((code == EQ || code == NE)
5321           && (op1 == const0_rtx || op1 == constm1_rtx))
5322         {
5323           rtx op00, op01;
5324
5325           /* Do a logical OR or AND of the two words and compare the
5326              result.  */
5327           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5328           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5329           tem = expand_binop (word_mode,
5330                               op1 == const0_rtx ? ior_optab : and_optab,
5331                               op00, op01, NULL_RTX, unsignedp,
5332                               OPTAB_DIRECT);
5333
5334           if (tem != 0)
5335             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5336                                    unsignedp, normalizep);
5337         }
5338       else if ((code == LT || code == GE) && op1 == const0_rtx)
5339         {
5340           rtx op0h;
5341
5342           /* If testing the sign bit, can just test on high word.  */
5343           op0h = simplify_gen_subreg (word_mode, op0, mode,
5344                                       subreg_highpart_offset (word_mode,
5345                                                               mode));
5346           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5347                                  unsignedp, normalizep);
5348         }
5349       else
5350         tem = NULL_RTX;
5351
5352       if (tem)
5353         {
5354           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5355             return tem;
5356           if (!target)
5357             target = gen_reg_rtx (target_mode);
5358
5359           convert_move (target, tem,
5360                         !val_signbit_known_set_p (word_mode,
5361                                                   (normalizep ? normalizep
5362                                                    : STORE_FLAG_VALUE)));
5363           return target;
5364         }
5365     }
5366
5367   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5368      complement of A (for GE) and shifting the sign bit to the low bit.  */
5369   if (op1 == const0_rtx && (code == LT || code == GE)
5370       && GET_MODE_CLASS (mode) == MODE_INT
5371       && (normalizep || STORE_FLAG_VALUE == 1
5372           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5373     {
5374       subtarget = target;
5375
5376       if (!target)
5377         target_mode = mode;
5378
5379       /* If the result is to be wider than OP0, it is best to convert it
5380          first.  If it is to be narrower, it is *incorrect* to convert it
5381          first.  */
5382       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5383         {
5384           op0 = convert_modes (target_mode, mode, op0, 0);
5385           mode = target_mode;
5386         }
5387
5388       if (target_mode != mode)
5389         subtarget = 0;
5390
5391       if (code == GE)
5392         op0 = expand_unop (mode, one_cmpl_optab, op0,
5393                            ((STORE_FLAG_VALUE == 1 || normalizep)
5394                             ? 0 : subtarget), 0);
5395
5396       if (STORE_FLAG_VALUE == 1 || normalizep)
5397         /* If we are supposed to produce a 0/1 value, we want to do
5398            a logical shift from the sign bit to the low-order bit; for
5399            a -1/0 value, we do an arithmetic shift.  */
5400         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5401                             GET_MODE_BITSIZE (mode) - 1,
5402                             subtarget, normalizep != -1);
5403
5404       if (mode != target_mode)
5405         op0 = convert_modes (target_mode, mode, op0, 0);
5406
5407       return op0;
5408     }
5409
5410   mclass = GET_MODE_CLASS (mode);
5411   for (compare_mode = mode; compare_mode != VOIDmode;
5412        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5413     {
5414      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5415      icode = optab_handler (cstore_optab, optab_mode);
5416      if (icode != CODE_FOR_nothing)
5417         {
5418           do_pending_stack_adjust ();
5419           rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5420                                  unsignedp, op0, op1, normalizep, target_mode);
5421           if (tem)
5422             return tem;
5423
5424           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5425             {
5426               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5427                                  unsignedp, op1, op0, normalizep, target_mode);
5428               if (tem)
5429                 return tem;
5430             }
5431           break;
5432         }
5433     }
5434
5435   return 0;
5436 }
5437
5438 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5439    and storing in TARGET.  Normally return TARGET.
5440    Return 0 if that cannot be done.
5441
5442    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5443    it is VOIDmode, they cannot both be CONST_INT.
5444
5445    UNSIGNEDP is for the case where we have to widen the operands
5446    to perform the operation.  It says to use zero-extension.
5447
5448    NORMALIZEP is 1 if we should convert the result to be either zero
5449    or one.  Normalize is -1 if we should convert the result to be
5450    either zero or -1.  If NORMALIZEP is zero, the result will be left
5451    "raw" out of the scc insn.  */
5452
5453 rtx
5454 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5455                  machine_mode mode, int unsignedp, int normalizep)
5456 {
5457   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5458   enum rtx_code rcode;
5459   rtx subtarget;
5460   rtx tem, trueval;
5461   rtx_insn *last;
5462
5463   /* If we compare constants, we shouldn't use a store-flag operation,
5464      but a constant load.  We can get there via the vanilla route that
5465      usually generates a compare-branch sequence, but will in this case
5466      fold the comparison to a constant, and thus elide the branch.  */
5467   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5468     return NULL_RTX;
5469
5470   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5471                            target_mode);
5472   if (tem)
5473     return tem;
5474
5475   /* If we reached here, we can't do this with a scc insn, however there
5476      are some comparisons that can be done in other ways.  Don't do any
5477      of these cases if branches are very cheap.  */
5478   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5479     return 0;
5480
5481   /* See what we need to return.  We can only return a 1, -1, or the
5482      sign bit.  */
5483
5484   if (normalizep == 0)
5485     {
5486       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5487         normalizep = STORE_FLAG_VALUE;
5488
5489       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5490         ;
5491       else
5492         return 0;
5493     }
5494
5495   last = get_last_insn ();
5496
5497   /* If optimizing, use different pseudo registers for each insn, instead
5498      of reusing the same pseudo.  This leads to better CSE, but slows
5499      down the compiler, since there are more pseudos */
5500   subtarget = (!optimize
5501                && (target_mode == mode)) ? target : NULL_RTX;
5502   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5503
5504   /* For floating-point comparisons, try the reverse comparison or try
5505      changing the "orderedness" of the comparison.  */
5506   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5507     {
5508       enum rtx_code first_code;
5509       bool and_them;
5510
5511       rcode = reverse_condition_maybe_unordered (code);
5512       if (can_compare_p (rcode, mode, ccp_store_flag)
5513           && (code == ORDERED || code == UNORDERED
5514               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5515               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5516         {
5517           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5518                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5519
5520           /* For the reverse comparison, use either an addition or a XOR.  */
5521           if (want_add
5522               && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5523                            optimize_insn_for_speed_p ()) == 0)
5524             {
5525               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5526                                        STORE_FLAG_VALUE, target_mode);
5527               if (tem)
5528                 return expand_binop (target_mode, add_optab, tem,
5529                                      gen_int_mode (normalizep, target_mode),
5530                                      target, 0, OPTAB_WIDEN);
5531             }
5532           else if (!want_add
5533                    && rtx_cost (trueval, XOR, 1,
5534                                 optimize_insn_for_speed_p ()) == 0)
5535             {
5536               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5537                                        normalizep, target_mode);
5538               if (tem)
5539                 return expand_binop (target_mode, xor_optab, tem, trueval,
5540                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5541             }
5542         }
5543
5544       delete_insns_since (last);
5545
5546       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5547       if (code == ORDERED || code == UNORDERED)
5548         return 0;
5549
5550       and_them = split_comparison (code, mode, &first_code, &code);
5551
5552       /* If there are no NaNs, the first comparison should always fall through.
5553          Effectively change the comparison to the other one.  */
5554       if (!HONOR_NANS (mode))
5555         {
5556           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5557           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5558                                     target_mode);
5559         }
5560
5561       if (!HAVE_conditional_move)
5562         return 0;
5563
5564       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5565          conditional move.  */
5566       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5567                                normalizep, target_mode);
5568       if (tem == 0)
5569         return 0;
5570
5571       if (and_them)
5572         tem = emit_conditional_move (target, code, op0, op1, mode,
5573                                      tem, const0_rtx, GET_MODE (tem), 0);
5574       else
5575         tem = emit_conditional_move (target, code, op0, op1, mode,
5576                                      trueval, tem, GET_MODE (tem), 0);
5577
5578       if (tem == 0)
5579         delete_insns_since (last);
5580       return tem;
5581     }
5582
5583   /* The remaining tricks only apply to integer comparisons.  */
5584
5585   if (GET_MODE_CLASS (mode) != MODE_INT)
5586     return 0;
5587
5588   /* If this is an equality comparison of integers, we can try to exclusive-or
5589      (or subtract) the two operands and use a recursive call to try the
5590      comparison with zero.  Don't do any of these cases if branches are
5591      very cheap.  */
5592
5593   if ((code == EQ || code == NE) && op1 != const0_rtx)
5594     {
5595       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5596                           OPTAB_WIDEN);
5597
5598       if (tem == 0)
5599         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5600                             OPTAB_WIDEN);
5601       if (tem != 0)
5602         tem = emit_store_flag (target, code, tem, const0_rtx,
5603                                mode, unsignedp, normalizep);
5604       if (tem != 0)
5605         return tem;
5606
5607       delete_insns_since (last);
5608     }
5609
5610   /* For integer comparisons, try the reverse comparison.  However, for
5611      small X and if we'd have anyway to extend, implementing "X != 0"
5612      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5613   rcode = reverse_condition (code);
5614   if (can_compare_p (rcode, mode, ccp_store_flag)
5615       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5616             && code == NE
5617             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5618             && op1 == const0_rtx))
5619     {
5620       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5621                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5622
5623       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5624       if (want_add
5625           && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5626                        optimize_insn_for_speed_p ()) == 0)
5627         {
5628           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5629                                    STORE_FLAG_VALUE, target_mode);
5630           if (tem != 0)
5631             tem = expand_binop (target_mode, add_optab, tem,
5632                                 gen_int_mode (normalizep, target_mode),
5633                                 target, 0, OPTAB_WIDEN);
5634         }
5635       else if (!want_add
5636                && rtx_cost (trueval, XOR, 1,
5637                             optimize_insn_for_speed_p ()) == 0)
5638         {
5639           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5640                                    normalizep, target_mode);
5641           if (tem != 0)
5642             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5643                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5644         }
5645
5646       if (tem != 0)
5647         return tem;
5648       delete_insns_since (last);
5649     }
5650
5651   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5652      the constant zero.  Reject all other comparisons at this point.  Only
5653      do LE and GT if branches are expensive since they are expensive on
5654      2-operand machines.  */
5655
5656   if (op1 != const0_rtx
5657       || (code != EQ && code != NE
5658           && (BRANCH_COST (optimize_insn_for_speed_p (),
5659                            false) <= 1 || (code != LE && code != GT))))
5660     return 0;
5661
5662   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5663      do the necessary operation below.  */
5664
5665   tem = 0;
5666
5667   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5668      the sign bit set.  */
5669
5670   if (code == LE)
5671     {
5672       /* This is destructive, so SUBTARGET can't be OP0.  */
5673       if (rtx_equal_p (subtarget, op0))
5674         subtarget = 0;
5675
5676       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5677                           OPTAB_WIDEN);
5678       if (tem)
5679         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5680                             OPTAB_WIDEN);
5681     }
5682
5683   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5684      number of bits in the mode of OP0, minus one.  */
5685
5686   if (code == GT)
5687     {
5688       if (rtx_equal_p (subtarget, op0))
5689         subtarget = 0;
5690
5691       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5692                           GET_MODE_BITSIZE (mode) - 1,
5693                           subtarget, 0);
5694       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5695                           OPTAB_WIDEN);
5696     }
5697
5698   if (code == EQ || code == NE)
5699     {
5700       /* For EQ or NE, one way to do the comparison is to apply an operation
5701          that converts the operand into a positive number if it is nonzero
5702          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5703          for NE we negate.  This puts the result in the sign bit.  Then we
5704          normalize with a shift, if needed.
5705
5706          Two operations that can do the above actions are ABS and FFS, so try
5707          them.  If that doesn't work, and MODE is smaller than a full word,
5708          we can use zero-extension to the wider mode (an unsigned conversion)
5709          as the operation.  */
5710
5711       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5712          that is compensated by the subsequent overflow when subtracting
5713          one / negating.  */
5714
5715       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5716         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5717       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5718         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5719       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5720         {
5721           tem = convert_modes (word_mode, mode, op0, 1);
5722           mode = word_mode;
5723         }
5724
5725       if (tem != 0)
5726         {
5727           if (code == EQ)
5728             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5729                                 0, OPTAB_WIDEN);
5730           else
5731             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5732         }
5733
5734       /* If we couldn't do it that way, for NE we can "or" the two's complement
5735          of the value with itself.  For EQ, we take the one's complement of
5736          that "or", which is an extra insn, so we only handle EQ if branches
5737          are expensive.  */
5738
5739       if (tem == 0
5740           && (code == NE
5741               || BRANCH_COST (optimize_insn_for_speed_p (),
5742                               false) > 1))
5743         {
5744           if (rtx_equal_p (subtarget, op0))
5745             subtarget = 0;
5746
5747           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5748           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5749                               OPTAB_WIDEN);
5750
5751           if (tem && code == EQ)
5752             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5753         }
5754     }
5755
5756   if (tem && normalizep)
5757     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5758                         GET_MODE_BITSIZE (mode) - 1,
5759                         subtarget, normalizep == 1);
5760
5761   if (tem)
5762     {
5763       if (!target)
5764         ;
5765       else if (GET_MODE (tem) != target_mode)
5766         {
5767           convert_move (target, tem, 0);
5768           tem = target;
5769         }
5770       else if (!subtarget)
5771         {
5772           emit_move_insn (target, tem);
5773           tem = target;
5774         }
5775     }
5776   else
5777     delete_insns_since (last);
5778
5779   return tem;
5780 }
5781
5782 /* Like emit_store_flag, but always succeeds.  */
5783
5784 rtx
5785 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5786                        machine_mode mode, int unsignedp, int normalizep)
5787 {
5788   rtx tem;
5789   rtx_code_label *label;
5790   rtx trueval, falseval;
5791
5792   /* First see if emit_store_flag can do the job.  */
5793   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5794   if (tem != 0)
5795     return tem;
5796
5797   if (!target)
5798     target = gen_reg_rtx (word_mode);
5799
5800   /* If this failed, we have to do this with set/compare/jump/set code.
5801      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5802   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5803   if (code == NE
5804       && GET_MODE_CLASS (mode) == MODE_INT
5805       && REG_P (target)
5806       && op0 == target
5807       && op1 == const0_rtx)
5808     {
5809       label = gen_label_rtx ();
5810       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
5811                                NULL_RTX, NULL, label, -1);
5812       emit_move_insn (target, trueval);
5813       emit_label (label);
5814       return target;
5815     }
5816
5817   if (!REG_P (target)
5818       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5819     target = gen_reg_rtx (GET_MODE (target));
5820
5821   /* Jump in the right direction if the target cannot implement CODE
5822      but can jump on its reverse condition.  */
5823   falseval = const0_rtx;
5824   if (! can_compare_p (code, mode, ccp_jump)
5825       && (! FLOAT_MODE_P (mode)
5826           || code == ORDERED || code == UNORDERED
5827           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5828           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5829     {
5830       enum rtx_code rcode;
5831       if (FLOAT_MODE_P (mode))
5832         rcode = reverse_condition_maybe_unordered (code);
5833       else
5834         rcode = reverse_condition (code);
5835
5836       /* Canonicalize to UNORDERED for the libcall.  */
5837       if (can_compare_p (rcode, mode, ccp_jump)
5838           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5839         {
5840           falseval = trueval;
5841           trueval = const0_rtx;
5842           code = rcode;
5843         }
5844     }
5845
5846   emit_move_insn (target, trueval);
5847   label = gen_label_rtx ();
5848   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
5849                            label, -1);
5850
5851   emit_move_insn (target, falseval);
5852   emit_label (label);
5853
5854   return target;
5855 }
5856 \f
5857 /* Perform possibly multi-word comparison and conditional jump to LABEL
5858    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5859    now a thin wrapper around do_compare_rtx_and_jump.  */
5860
5861 static void
5862 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
5863                  rtx_code_label *label)
5864 {
5865   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5866   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
5867                            NULL, label, -1);
5868 }