gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2015 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "diagnostic-core.h"
  27 #include "rtl.h"
  28 #include "hash-set.h"
  29 #include "machmode.h"
  30 #include "vec.h"
  31 #include "double-int.h"
  32 #include "input.h"
  33 #include "alias.h"
  34 #include "symtab.h"
  35 #include "wide-int.h"
  36 #include "inchash.h"
  37 #include "tree.h"
  38 #include "fold-const.h"
  39 #include "stor-layout.h"
  40 #include "tm_p.h"
  41 #include "flags.h"
  42 #include "insn-config.h"
  43 #include "hashtab.h"
  44 #include "hard-reg-set.h"
  45 #include "function.h"
  46 #include "statistics.h"
  47 #include "real.h"
  48 #include "fixed-value.h"
  49 #include "expmed.h"
  50 #include "dojump.h"
  51 #include "explow.h"
  52 #include "calls.h"
  53 #include "emit-rtl.h"
  54 #include "varasm.h"
  55 #include "stmt.h"
  56 #include "expr.h"
  57 #include "insn-codes.h"
  58 #include "optabs.h"
  59 #include "recog.h"
  60 #include "langhooks.h"
  61 #include "predict.h"
  62 #include "basic-block.h"
  63 #include "df.h"
  64 #include "target.h"
  65
  66 struct target_expmed default_target_expmed;
  67 #if SWITCHABLE_TARGET
  68 struct target_expmed *this_target_expmed = &default_target_expmed;
  69 #endif
  70
  71 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  72                                    unsigned HOST_WIDE_INT,
  73                                    unsigned HOST_WIDE_INT,
  74                                    unsigned HOST_WIDE_INT,
  75                                    rtx);
  76 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
  77                                      unsigned HOST_WIDE_INT,
  78                                      rtx);
  79 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  80                                    unsigned HOST_WIDE_INT,
  81                                    unsigned HOST_WIDE_INT,
  82                                    unsigned HOST_WIDE_INT,
  83                                    rtx);
  84 static rtx extract_fixed_bit_field (machine_mode, rtx,
  85                                     unsigned HOST_WIDE_INT,
  86                                     unsigned HOST_WIDE_INT, rtx, int);
  87 static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
  88                                       unsigned HOST_WIDE_INT,
  89                                       unsigned HOST_WIDE_INT, rtx, int);
  90 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  91 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  92                                     unsigned HOST_WIDE_INT, int);
  93 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  94 static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  95 static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  96
  97 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  98    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  99    The mask is truncated if necessary to the width of mode MODE.  The
 100    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
 101
 102 static inline rtx
 103 mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
 104 {
 105   return immed_wide_int_const
 106     (wi::shifted_mask (bitpos, bitsize, complement,
 107                        GET_MODE_PRECISION (mode)), mode);
 108 }
 109
 110 /* Test whether a value is zero of a power of two.  */
 111 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
 112   (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
 113
 114 struct init_expmed_rtl
 115 {
 116   rtx reg;
 117   rtx plus;
 118   rtx neg;
 119   rtx mult;
 120   rtx sdiv;
 121   rtx udiv;
 122   rtx sdiv_32;
 123   rtx smod_32;
 124   rtx wide_mult;
 125   rtx wide_lshr;
 126   rtx wide_trunc;
 127   rtx shift;
 128   rtx shift_mult;
 129   rtx shift_add;
 130   rtx shift_sub0;
 131   rtx shift_sub1;
 132   rtx zext;
 133   rtx trunc;
 134
 135   rtx pow2[MAX_BITS_PER_WORD];
 136   rtx cint[MAX_BITS_PER_WORD];
 137 };
 138
 139 static void
 140 init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
 141                       machine_mode from_mode, bool speed)
 142 {
 143   int to_size, from_size;
 144   rtx which;
 145
 146   to_size = GET_MODE_PRECISION (to_mode);
 147   from_size = GET_MODE_PRECISION (from_mode);
 148
 149   /* Most partial integers have a precision less than the "full"
 150      integer it requires for storage.  In case one doesn't, for
 151      comparison purposes here, reduce the bit size by one in that
 152      case.  */
 153   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 154       && exact_log2 (to_size) != -1)
 155     to_size --;
 156   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 157       && exact_log2 (from_size) != -1)
 158     from_size --;
 159
 160   /* Assume cost of zero-extend and sign-extend is the same.  */
 161   which = (to_size < from_size ? all->trunc : all->zext);
 162
 163   PUT_MODE (all->reg, from_mode);
 164   set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
 165 }
 166
 167 static void
 168 init_expmed_one_mode (struct init_expmed_rtl *all,
 169                       machine_mode mode, int speed)
 170 {
 171   int m, n, mode_bitsize;
 172   machine_mode mode_from;
 173
 174   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 175
 176   PUT_MODE (all->reg, mode);
 177   PUT_MODE (all->plus, mode);
 178   PUT_MODE (all->neg, mode);
 179   PUT_MODE (all->mult, mode);
 180   PUT_MODE (all->sdiv, mode);
 181   PUT_MODE (all->udiv, mode);
 182   PUT_MODE (all->sdiv_32, mode);
 183   PUT_MODE (all->smod_32, mode);
 184   PUT_MODE (all->wide_trunc, mode);
 185   PUT_MODE (all->shift, mode);
 186   PUT_MODE (all->shift_mult, mode);
 187   PUT_MODE (all->shift_add, mode);
 188   PUT_MODE (all->shift_sub0, mode);
 189   PUT_MODE (all->shift_sub1, mode);
 190   PUT_MODE (all->zext, mode);
 191   PUT_MODE (all->trunc, mode);
 192
 193   set_add_cost (speed, mode, set_src_cost (all->plus, speed));
 194   set_neg_cost (speed, mode, set_src_cost (all->neg, speed));
 195   set_mul_cost (speed, mode, set_src_cost (all->mult, speed));
 196   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, speed));
 197   set_udiv_cost (speed, mode, set_src_cost (all->udiv, speed));
 198
 199   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, speed)
 200                                      <= 2 * add_cost (speed, mode)));
 201   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, speed)
 202                                      <= 4 * add_cost (speed, mode)));
 203
 204   set_shift_cost (speed, mode, 0, 0);
 205   {
 206     int cost = add_cost (speed, mode);
 207     set_shiftadd_cost (speed, mode, 0, cost);
 208     set_shiftsub0_cost (speed, mode, 0, cost);
 209     set_shiftsub1_cost (speed, mode, 0, cost);
 210   }
 211
 212   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 213   for (m = 1; m < n; m++)
 214     {
 215       XEXP (all->shift, 1) = all->cint[m];
 216       XEXP (all->shift_mult, 1) = all->pow2[m];
 217
 218       set_shift_cost (speed, mode, m, set_src_cost (all->shift, speed));
 219       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, speed));
 220       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, speed));
 221       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, speed));
 222     }
 223
 224   if (SCALAR_INT_MODE_P (mode))
 225     {
 226       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 227            mode_from = (machine_mode)(mode_from + 1))
 228         init_expmed_one_conv (all, mode, mode_from, speed);
 229     }
 230   if (GET_MODE_CLASS (mode) == MODE_INT)
 231     {
 232       machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
 233       if (wider_mode != VOIDmode)
 234         {
 235           PUT_MODE (all->zext, wider_mode);
 236           PUT_MODE (all->wide_mult, wider_mode);
 237           PUT_MODE (all->wide_lshr, wider_mode);
 238           XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 239
 240           set_mul_widen_cost (speed, wider_mode,
 241                               set_src_cost (all->wide_mult, speed));
 242           set_mul_highpart_cost (speed, mode,
 243                                  set_src_cost (all->wide_trunc, speed));
 244         }
 245     }
 246 }
 247
 248 void
 249 init_expmed (void)
 250 {
 251   struct init_expmed_rtl all;
 252   machine_mode mode = QImode;
 253   int m, speed;
 254
 255   memset (&all, 0, sizeof all);
 256   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 257     {
 258       all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 259       all.cint[m] = GEN_INT (m);
 260     }
 261
 262   /* Avoid using hard regs in ways which may be unsupported.  */
 263   all.reg = gen_rtx_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 264   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 265   all.neg = gen_rtx_NEG (mode, all.reg);
 266   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 267   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 268   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 269   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 270   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 271   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 272   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 273   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 274   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 275   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 276   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 277   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 278   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 279   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 280   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 281
 282   for (speed = 0; speed < 2; speed++)
 283     {
 284       crtl->maybe_hot_insn_p = speed;
 285       set_zero_cost (speed, set_src_cost (const0_rtx, speed));
 286
 287       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 288            mode = (machine_mode)(mode + 1))
 289         init_expmed_one_mode (&all, mode, speed);
 290
 291       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 292         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 293              mode = (machine_mode)(mode + 1))
 294           init_expmed_one_mode (&all, mode, speed);
 295
 296       if (MIN_MODE_VECTOR_INT != VOIDmode)
 297         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 298              mode = (machine_mode)(mode + 1))
 299           init_expmed_one_mode (&all, mode, speed);
 300     }
 301
 302   if (alg_hash_used_p ())
 303     {
 304       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 305       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 306     }
 307   else
 308     set_alg_hash_used_p (true);
 309   default_rtl_profile ();
 310
 311   ggc_free (all.trunc);
 312   ggc_free (all.shift_sub1);
 313   ggc_free (all.shift_sub0);
 314   ggc_free (all.shift_add);
 315   ggc_free (all.shift_mult);
 316   ggc_free (all.shift);
 317   ggc_free (all.wide_trunc);
 318   ggc_free (all.wide_lshr);
 319   ggc_free (all.wide_mult);
 320   ggc_free (all.zext);
 321   ggc_free (all.smod_32);
 322   ggc_free (all.sdiv_32);
 323   ggc_free (all.udiv);
 324   ggc_free (all.sdiv);
 325   ggc_free (all.mult);
 326   ggc_free (all.neg);
 327   ggc_free (all.plus);
 328   ggc_free (all.reg);
 329 }
 330
 331 /* Return an rtx representing minus the value of X.
 332    MODE is the intended mode of the result,
 333    useful if X is a CONST_INT.  */
 334
 335 rtx
 336 negate_rtx (machine_mode mode, rtx x)
 337 {
 338   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 339
 340   if (result == 0)
 341     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 342
 343   return result;
 344 }
 345
 346 /* Adjust bitfield memory MEM so that it points to the first unit of mode
 347    MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
 348    If MODE is BLKmode, return a reference to every byte in the bitfield.
 349    Set *NEW_BITNUM to the bit position of the field within the new memory.  */
 350
 351 static rtx
 352 narrow_bit_field_mem (rtx mem, machine_mode mode,
 353                       unsigned HOST_WIDE_INT bitsize,
 354                       unsigned HOST_WIDE_INT bitnum,
 355                       unsigned HOST_WIDE_INT *new_bitnum)
 356 {
 357   if (mode == BLKmode)
 358     {
 359       *new_bitnum = bitnum % BITS_PER_UNIT;
 360       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 361       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 362                             / BITS_PER_UNIT);
 363       return adjust_bitfield_address_size (mem, mode, offset, size);
 364     }
 365   else
 366     {
 367       unsigned int unit = GET_MODE_BITSIZE (mode);
 368       *new_bitnum = bitnum % unit;
 369       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 370       return adjust_bitfield_address (mem, mode, offset);
 371     }
 372 }
 373
 374 /* The caller wants to perform insertion or extraction PATTERN on a
 375    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 376    BITREGION_START and BITREGION_END are as for store_bit_field
 377    and FIELDMODE is the natural mode of the field.
 378
 379    Search for a mode that is compatible with the memory access
 380    restrictions and (where applicable) with a register insertion or
 381    extraction.  Return the new memory on success, storing the adjusted
 382    bit position in *NEW_BITNUM.  Return null otherwise.  */
 383
 384 static rtx
 385 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 386                               rtx op0, HOST_WIDE_INT bitsize,
 387                               HOST_WIDE_INT bitnum,
 388                               unsigned HOST_WIDE_INT bitregion_start,
 389                               unsigned HOST_WIDE_INT bitregion_end,
 390                               machine_mode fieldmode,
 391                               unsigned HOST_WIDE_INT *new_bitnum)
 392 {
 393   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 394                                 bitregion_end, MEM_ALIGN (op0),
 395                                 MEM_VOLATILE_P (op0));
 396   machine_mode best_mode;
 397   if (iter.next_mode (&best_mode))
 398     {
 399       /* We can use a memory in BEST_MODE.  See whether this is true for
 400          any wider modes.  All other things being equal, we prefer to
 401          use the widest mode possible because it tends to expose more
 402          CSE opportunities.  */
 403       if (!iter.prefer_smaller_modes ())
 404         {
 405           /* Limit the search to the mode required by the corresponding
 406              register insertion or extraction instruction, if any.  */
 407           machine_mode limit_mode = word_mode;
 408           extraction_insn insn;
 409           if (get_best_reg_extraction_insn (&insn, pattern,
 410                                             GET_MODE_BITSIZE (best_mode),
 411                                             fieldmode))
 412             limit_mode = insn.field_mode;
 413
 414           machine_mode wider_mode;
 415           while (iter.next_mode (&wider_mode)
 416                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 417             best_mode = wider_mode;
 418         }
 419       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 420                                    new_bitnum);
 421     }
 422   return NULL_RTX;
 423 }
 424
 425 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 426    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 427    offset is then BITNUM / BITS_PER_UNIT.  */
 428
 429 static bool
 430 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 431                      unsigned HOST_WIDE_INT bitsize,
 432                      machine_mode struct_mode)
 433 {
 434   if (BYTES_BIG_ENDIAN)
 435     return (bitnum % BITS_PER_UNIT == 0
 436             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 437                 || (bitnum + bitsize) % BITS_PER_WORD == 0));
 438   else
 439     return bitnum % BITS_PER_WORD == 0;
 440 }
 441
 442 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 443    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 444    Return false if the access would touch memory outside the range
 445    BITREGION_START to BITREGION_END for conformance to the C++ memory
 446    model.  */
 447
 448 static bool
 449 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 450                             unsigned HOST_WIDE_INT bitnum,
 451                             machine_mode fieldmode,
 452                             unsigned HOST_WIDE_INT bitregion_start,
 453                             unsigned HOST_WIDE_INT bitregion_end)
 454 {
 455   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 456
 457   /* -fstrict-volatile-bitfields must be enabled and we must have a
 458      volatile MEM.  */
 459   if (!MEM_P (op0)
 460       || !MEM_VOLATILE_P (op0)
 461       || flag_strict_volatile_bitfields <= 0)
 462     return false;
 463
 464   /* Non-integral modes likely only happen with packed structures.
 465      Punt.  */
 466   if (!SCALAR_INT_MODE_P (fieldmode))
 467     return false;
 468
 469   /* The bit size must not be larger than the field mode, and
 470      the field mode must not be larger than a word.  */
 471   if (bitsize > modesize || modesize > BITS_PER_WORD)
 472     return false;
 473
 474   /* Check for cases of unaligned fields that must be split.  */
 475   if (bitnum % modesize + bitsize > modesize)
 476     return false;
 477
 478   /* The memory must be sufficiently aligned for a MODESIZE access.
 479      This condition guarantees, that the memory access will not
 480      touch anything after the end of the structure.  */
 481   if (MEM_ALIGN (op0) < modesize)
 482     return false;
 483
 484   /* Check for cases where the C++ memory model applies.  */
 485   if (bitregion_end != 0
 486       && (bitnum - bitnum % modesize < bitregion_start
 487           || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
 488     return false;
 489
 490   return true;
 491 }
 492
 493 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 494    bit number BITNUM can be treated as a simple value of mode MODE.  */
 495
 496 static bool
 497 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 498                        unsigned HOST_WIDE_INT bitnum, machine_mode mode)
 499 {
 500   return (MEM_P (op0)
 501           && bitnum % BITS_PER_UNIT == 0
 502           && bitsize == GET_MODE_BITSIZE (mode)
 503           && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
 504               || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
 505                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 506 }
 507 \f
 508 /* Try to use instruction INSV to store VALUE into a field of OP0.
 509    BITSIZE and BITNUM are as for store_bit_field.  */
 510
 511 static bool
 512 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 513                             unsigned HOST_WIDE_INT bitsize,
 514                             unsigned HOST_WIDE_INT bitnum,
 515                             rtx value)
 516 {
 517   struct expand_operand ops[4];
 518   rtx value1;
 519   rtx xop0 = op0;
 520   rtx_insn *last = get_last_insn ();
 521   bool copy_back = false;
 522
 523   machine_mode op_mode = insv->field_mode;
 524   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 525   if (bitsize == 0 || bitsize > unit)
 526     return false;
 527
 528   if (MEM_P (xop0))
 529     /* Get a reference to the first byte of the field.  */
 530     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 531                                  &bitnum);
 532   else
 533     {
 534       /* Convert from counting within OP0 to counting in OP_MODE.  */
 535       if (BYTES_BIG_ENDIAN)
 536         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 537
 538       /* If xop0 is a register, we need it in OP_MODE
 539          to make it acceptable to the format of insv.  */
 540       if (GET_CODE (xop0) == SUBREG)
 541         /* We can't just change the mode, because this might clobber op0,
 542            and we will need the original value of op0 if insv fails.  */
 543         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 544       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 545         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 546     }
 547
 548   /* If the destination is a paradoxical subreg such that we need a
 549      truncate to the inner mode, perform the insertion on a temporary and
 550      truncate the result to the original destination.  Note that we can't
 551      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 552      X) 0)) is (reg:N X).  */
 553   if (GET_CODE (xop0) == SUBREG
 554       && REG_P (SUBREG_REG (xop0))
 555       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 556                                          op_mode))
 557     {
 558       rtx tem = gen_reg_rtx (op_mode);
 559       emit_move_insn (tem, xop0);
 560       xop0 = tem;
 561       copy_back = true;
 562     }
 563
 564   /* There are similar overflow check at the start of store_bit_field_1,
 565      but that only check the situation where the field lies completely
 566      outside the register, while there do have situation where the field
 567      lies partialy in the register, we need to adjust bitsize for this
 568      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
 569      will broken on those arch support bit insert instruction, like arm, aarch64
 570      etc.  */
 571   if (bitsize + bitnum > unit && bitnum < unit)
 572     {
 573       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
 574                "destination object, data truncated into %wu-bit",
 575                bitsize, unit - bitnum);
 576       bitsize = unit - bitnum;
 577     }
 578
 579   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 580      "backwards" from the size of the unit we are inserting into.
 581      Otherwise, we count bits from the most significant on a
 582      BYTES/BITS_BIG_ENDIAN machine.  */
 583
 584   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 585     bitnum = unit - bitsize - bitnum;
 586
 587   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 588   value1 = value;
 589   if (GET_MODE (value) != op_mode)
 590     {
 591       if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 592         {
 593           /* Optimization: Don't bother really extending VALUE
 594              if it has all the bits we will actually use.  However,
 595              if we must narrow it, be sure we do it correctly.  */
 596
 597           if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 598             {
 599               rtx tmp;
 600
 601               tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 602               if (! tmp)
 603                 tmp = simplify_gen_subreg (op_mode,
 604                                            force_reg (GET_MODE (value),
 605                                                       value1),
 606                                            GET_MODE (value), 0);
 607               value1 = tmp;
 608             }
 609           else
 610             value1 = gen_lowpart (op_mode, value1);
 611         }
 612       else if (CONST_INT_P (value))
 613         value1 = gen_int_mode (INTVAL (value), op_mode);
 614       else
 615         /* Parse phase is supposed to make VALUE's data type
 616            match that of the component reference, which is a type
 617            at least as wide as the field; so VALUE should have
 618            a mode that corresponds to that type.  */
 619         gcc_assert (CONSTANT_P (value));
 620     }
 621
 622   create_fixed_operand (&ops[0], xop0);
 623   create_integer_operand (&ops[1], bitsize);
 624   create_integer_operand (&ops[2], bitnum);
 625   create_input_operand (&ops[3], value1, op_mode);
 626   if (maybe_expand_insn (insv->icode, 4, ops))
 627     {
 628       if (copy_back)
 629         convert_move (op0, xop0, true);
 630       return true;
 631     }
 632   delete_insns_since (last);
 633   return false;
 634 }
 635
 636 /* A subroutine of store_bit_field, with the same arguments.  Return true
 637    if the operation could be implemented.
 638
 639    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 640    no other way of implementing the operation.  If FALLBACK_P is false,
 641    return false instead.  */
 642
 643 static bool
 644 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 645                    unsigned HOST_WIDE_INT bitnum,
 646                    unsigned HOST_WIDE_INT bitregion_start,
 647                    unsigned HOST_WIDE_INT bitregion_end,
 648                    machine_mode fieldmode,
 649                    rtx value, bool fallback_p)
 650 {
 651   rtx op0 = str_rtx;
 652   rtx orig_value;
 653
 654   while (GET_CODE (op0) == SUBREG)
 655     {
 656       /* The following line once was done only if WORDS_BIG_ENDIAN,
 657          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 658          meaningful at a much higher level; when structures are copied
 659          between memory and regs, the higher-numbered regs
 660          always get higher addresses.  */
 661       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 662       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 663       int byte_offset = 0;
 664
 665       /* Paradoxical subregs need special handling on big endian machines.  */
 666       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 667         {
 668           int difference = inner_mode_size - outer_mode_size;
 669
 670           if (WORDS_BIG_ENDIAN)
 671             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 672           if (BYTES_BIG_ENDIAN)
 673             byte_offset += difference % UNITS_PER_WORD;
 674         }
 675       else
 676         byte_offset = SUBREG_BYTE (op0);
 677
 678       bitnum += byte_offset * BITS_PER_UNIT;
 679       op0 = SUBREG_REG (op0);
 680     }
 681
 682   /* No action is needed if the target is a register and if the field
 683      lies completely outside that register.  This can occur if the source
 684      code contains an out-of-bounds access to a small array.  */
 685   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 686     return true;
 687
 688   /* Use vec_set patterns for inserting parts of vectors whenever
 689      available.  */
 690   if (VECTOR_MODE_P (GET_MODE (op0))
 691       && !MEM_P (op0)
 692       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 693       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 694       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 695       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 696     {
 697       struct expand_operand ops[3];
 698       machine_mode outermode = GET_MODE (op0);
 699       machine_mode innermode = GET_MODE_INNER (outermode);
 700       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 701       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 702
 703       create_fixed_operand (&ops[0], op0);
 704       create_input_operand (&ops[1], value, innermode);
 705       create_integer_operand (&ops[2], pos);
 706       if (maybe_expand_insn (icode, 3, ops))
 707         return true;
 708     }
 709
 710   /* If the target is a register, overwriting the entire object, or storing
 711      a full-word or multi-word field can be done with just a SUBREG.  */
 712   if (!MEM_P (op0)
 713       && bitsize == GET_MODE_BITSIZE (fieldmode)
 714       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 715           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 716     {
 717       /* Use the subreg machinery either to narrow OP0 to the required
 718          words or to cope with mode punning between equal-sized modes.
 719          In the latter case, use subreg on the rhs side, not lhs.  */
 720       rtx sub;
 721
 722       if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
 723         {
 724           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 725           if (sub)
 726             {
 727               emit_move_insn (op0, sub);
 728               return true;
 729             }
 730         }
 731       else
 732         {
 733           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 734                                      bitnum / BITS_PER_UNIT);
 735           if (sub)
 736             {
 737               emit_move_insn (sub, value);
 738               return true;
 739             }
 740         }
 741     }
 742
 743   /* If the target is memory, storing any naturally aligned field can be
 744      done with a simple store.  For targets that support fast unaligned
 745      memory, any naturally sized, unit aligned field can be done directly.  */
 746   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
 747     {
 748       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 749       emit_move_insn (op0, value);
 750       return true;
 751     }
 752
 753   /* Make sure we are playing with integral modes.  Pun with subregs
 754      if we aren't.  This must come after the entire register case above,
 755      since that case is valid for any mode.  The following cases are only
 756      valid for integral modes.  */
 757   {
 758     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 759     if (imode != GET_MODE (op0))
 760       {
 761         if (MEM_P (op0))
 762           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
 763         else
 764           {
 765             gcc_assert (imode != BLKmode);
 766             op0 = gen_lowpart (imode, op0);
 767           }
 768       }
 769   }
 770
 771   /* Storing an lsb-aligned field in a register
 772      can be done with a movstrict instruction.  */
 773
 774   if (!MEM_P (op0)
 775       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 776       && bitsize == GET_MODE_BITSIZE (fieldmode)
 777       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 778     {
 779       struct expand_operand ops[2];
 780       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 781       rtx arg0 = op0;
 782       unsigned HOST_WIDE_INT subreg_off;
 783
 784       if (GET_CODE (arg0) == SUBREG)
 785         {
 786           /* Else we've got some float mode source being extracted into
 787              a different float mode destination -- this combination of
 788              subregs results in Severe Tire Damage.  */
 789           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 790                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 791                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 792           arg0 = SUBREG_REG (arg0);
 793         }
 794
 795       subreg_off = bitnum / BITS_PER_UNIT;
 796       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 797         {
 798           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 799
 800           create_fixed_operand (&ops[0], arg0);
 801           /* Shrink the source operand to FIELDMODE.  */
 802           create_convert_operand_to (&ops[1], value, fieldmode, false);
 803           if (maybe_expand_insn (icode, 2, ops))
 804             return true;
 805         }
 806     }
 807
 808   /* Handle fields bigger than a word.  */
 809
 810   if (bitsize > BITS_PER_WORD)
 811     {
 812       /* Here we transfer the words of the field
 813          in the order least significant first.
 814          This is because the most significant word is the one which may
 815          be less than full.
 816          However, only do that if the value is not BLKmode.  */
 817
 818       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 819       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 820       unsigned int i;
 821       rtx_insn *last;
 822
 823       /* This is the mode we must force value to, so that there will be enough
 824          subwords to extract.  Note that fieldmode will often (always?) be
 825          VOIDmode, because that is what store_field uses to indicate that this
 826          is a bit field, but passing VOIDmode to operand_subword_force
 827          is not allowed.  */
 828       fieldmode = GET_MODE (value);
 829       if (fieldmode == VOIDmode)
 830         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 831
 832       last = get_last_insn ();
 833       for (i = 0; i < nwords; i++)
 834         {
 835           /* If I is 0, use the low-order word in both field and target;
 836              if I is 1, use the next to lowest word; and so on.  */
 837           unsigned int wordnum = (backwards
 838                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 839                                   - i - 1
 840                                   : i);
 841           unsigned int bit_offset = (backwards
 842                                      ? MAX ((int) bitsize - ((int) i + 1)
 843                                             * BITS_PER_WORD,
 844                                             0)
 845                                      : (int) i * BITS_PER_WORD);
 846           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 847           unsigned HOST_WIDE_INT new_bitsize =
 848             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 849
 850           /* If the remaining chunk doesn't have full wordsize we have
 851              to make sure that for big endian machines the higher order
 852              bits are used.  */
 853           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 854             value_word = simplify_expand_binop (word_mode, lshr_optab,
 855                                                 value_word,
 856                                                 GEN_INT (BITS_PER_WORD
 857                                                          - new_bitsize),
 858                                                 NULL_RTX, true,
 859                                                 OPTAB_LIB_WIDEN);
 860
 861           if (!store_bit_field_1 (op0, new_bitsize,
 862                                   bitnum + bit_offset,
 863                                   bitregion_start, bitregion_end,
 864                                   word_mode,
 865                                   value_word, fallback_p))
 866             {
 867               delete_insns_since (last);
 868               return false;
 869             }
 870         }
 871       return true;
 872     }
 873
 874   /* If VALUE has a floating-point or complex mode, access it as an
 875      integer of the corresponding size.  This can occur on a machine
 876      with 64 bit registers that uses SFmode for float.  It can also
 877      occur for unaligned float or complex fields.  */
 878   orig_value = value;
 879   if (GET_MODE (value) != VOIDmode
 880       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 881       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 882     {
 883       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 884       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 885     }
 886
 887   /* If OP0 is a multi-word register, narrow it to the affected word.
 888      If the region spans two words, defer to store_split_bit_field.  */
 889   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 890     {
 891       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
 892                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 893       gcc_assert (op0);
 894       bitnum %= BITS_PER_WORD;
 895       if (bitnum + bitsize > BITS_PER_WORD)
 896         {
 897           if (!fallback_p)
 898             return false;
 899
 900           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 901                                  bitregion_end, value);
 902           return true;
 903         }
 904     }
 905
 906   /* From here on we can assume that the field to be stored in fits
 907      within a word.  If the destination is a register, it too fits
 908      in a word.  */
 909
 910   extraction_insn insv;
 911   if (!MEM_P (op0)
 912       && get_best_reg_extraction_insn (&insv, EP_insv,
 913                                        GET_MODE_BITSIZE (GET_MODE (op0)),
 914                                        fieldmode)
 915       && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 916     return true;
 917
 918   /* If OP0 is a memory, try copying it to a register and seeing if a
 919      cheap register alternative is available.  */
 920   if (MEM_P (op0))
 921     {
 922       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
 923                                         fieldmode)
 924           && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 925         return true;
 926
 927       rtx_insn *last = get_last_insn ();
 928
 929       /* Try loading part of OP0 into a register, inserting the bitfield
 930          into that, and then copying the result back to OP0.  */
 931       unsigned HOST_WIDE_INT bitpos;
 932       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
 933                                                bitregion_start, bitregion_end,
 934                                                fieldmode, &bitpos);
 935       if (xop0)
 936         {
 937           rtx tempreg = copy_to_reg (xop0);
 938           if (store_bit_field_1 (tempreg, bitsize, bitpos,
 939                                  bitregion_start, bitregion_end,
 940                                  fieldmode, orig_value, false))
 941             {
 942               emit_move_insn (xop0, tempreg);
 943               return true;
 944             }
 945           delete_insns_since (last);
 946         }
 947     }
 948
 949   if (!fallback_p)
 950     return false;
 951
 952   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
 953                          bitregion_end, value);
 954   return true;
 955 }
 956
 957 /* Generate code to store value from rtx VALUE
 958    into a bit-field within structure STR_RTX
 959    containing BITSIZE bits starting at bit BITNUM.
 960
 961    BITREGION_START is bitpos of the first bitfield in this region.
 962    BITREGION_END is the bitpos of the ending bitfield in this region.
 963    These two fields are 0, if the C++ memory model does not apply,
 964    or we are not interested in keeping track of bitfield regions.
 965
 966    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 967
 968 void
 969 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 970                  unsigned HOST_WIDE_INT bitnum,
 971                  unsigned HOST_WIDE_INT bitregion_start,
 972                  unsigned HOST_WIDE_INT bitregion_end,
 973                  machine_mode fieldmode,
 974                  rtx value)
 975 {
 976   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
 977   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
 978                                   bitregion_start, bitregion_end))
 979     {
 980       /* Storing of a full word can be done with a simple store.
 981          We know here that the field can be accessed with one single
 982          instruction.  For targets that support unaligned memory,
 983          an unaligned access may be necessary.  */
 984       if (bitsize == GET_MODE_BITSIZE (fieldmode))
 985         {
 986           str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
 987                                              bitnum / BITS_PER_UNIT);
 988           gcc_assert (bitnum % BITS_PER_UNIT == 0);
 989           emit_move_insn (str_rtx, value);
 990         }
 991       else
 992         {
 993           rtx temp;
 994
 995           str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
 996                                           &bitnum);
 997           gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (fieldmode));
 998           temp = copy_to_reg (str_rtx);
 999           if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
1000                                   fieldmode, value, true))
1001             gcc_unreachable ();
1002
1003           emit_move_insn (str_rtx, temp);
1004         }
1005
1006       return;
1007     }
1008
1009   /* Under the C++0x memory model, we must not touch bits outside the
1010      bit region.  Adjust the address to start at the beginning of the
1011      bit region.  */
1012   if (MEM_P (str_rtx) && bitregion_start > 0)
1013     {
1014       machine_mode bestmode;
1015       HOST_WIDE_INT offset, size;
1016
1017       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1018
1019       offset = bitregion_start / BITS_PER_UNIT;
1020       bitnum -= bitregion_start;
1021       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1022       bitregion_end -= bitregion_start;
1023       bitregion_start = 0;
1024       bestmode = get_best_mode (bitsize, bitnum,
1025                                 bitregion_start, bitregion_end,
1026                                 MEM_ALIGN (str_rtx), VOIDmode,
1027                                 MEM_VOLATILE_P (str_rtx));
1028       str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
1029     }
1030
1031   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1032                           bitregion_start, bitregion_end,
1033                           fieldmode, value, true))
1034     gcc_unreachable ();
1035 }
1036 \f
1037 /* Use shifts and boolean operations to store VALUE into a bit field of
1038    width BITSIZE in OP0, starting at bit BITNUM.  */
1039
1040 static void
1041 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1042                        unsigned HOST_WIDE_INT bitnum,
1043                        unsigned HOST_WIDE_INT bitregion_start,
1044                        unsigned HOST_WIDE_INT bitregion_end,
1045                        rtx value)
1046 {
1047   /* There is a case not handled here:
1048      a structure with a known alignment of just a halfword
1049      and a field split across two aligned halfwords within the structure.
1050      Or likewise a structure with a known alignment of just a byte
1051      and a field split across two bytes.
1052      Such cases are not supposed to be able to occur.  */
1053
1054   if (MEM_P (op0))
1055     {
1056       machine_mode mode = GET_MODE (op0);
1057       if (GET_MODE_BITSIZE (mode) == 0
1058           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1059         mode = word_mode;
1060       mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1061                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1062
1063       if (mode == VOIDmode)
1064         {
1065           /* The only way this should occur is if the field spans word
1066              boundaries.  */
1067           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1068                                  bitregion_end, value);
1069           return;
1070         }
1071
1072       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1073     }
1074
1075   store_fixed_bit_field_1 (op0, bitsize, bitnum, value);
1076 }
1077
1078 /* Helper function for store_fixed_bit_field, stores
1079    the bit field always using the MODE of OP0.  */
1080
1081 static void
1082 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1083                          unsigned HOST_WIDE_INT bitnum,
1084                          rtx value)
1085 {
1086   machine_mode mode;
1087   rtx temp;
1088   int all_zero = 0;
1089   int all_one = 0;
1090
1091   mode = GET_MODE (op0);
1092   gcc_assert (SCALAR_INT_MODE_P (mode));
1093
1094   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1095      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1096
1097   if (BYTES_BIG_ENDIAN)
1098     /* BITNUM is the distance between our msb
1099        and that of the containing datum.
1100        Convert it to the distance from the lsb.  */
1101     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1102
1103   /* Now BITNUM is always the distance between our lsb
1104      and that of OP0.  */
1105
1106   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1107      we must first convert its mode to MODE.  */
1108
1109   if (CONST_INT_P (value))
1110     {
1111       unsigned HOST_WIDE_INT v = UINTVAL (value);
1112
1113       if (bitsize < HOST_BITS_PER_WIDE_INT)
1114         v &= ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1;
1115
1116       if (v == 0)
1117         all_zero = 1;
1118       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1119                 && v == ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1)
1120                || (bitsize == HOST_BITS_PER_WIDE_INT
1121                    && v == (unsigned HOST_WIDE_INT) -1))
1122         all_one = 1;
1123
1124       value = lshift_value (mode, v, bitnum);
1125     }
1126   else
1127     {
1128       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1129                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1130
1131       if (GET_MODE (value) != mode)
1132         value = convert_to_mode (mode, value, 1);
1133
1134       if (must_and)
1135         value = expand_binop (mode, and_optab, value,
1136                               mask_rtx (mode, 0, bitsize, 0),
1137                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1138       if (bitnum > 0)
1139         value = expand_shift (LSHIFT_EXPR, mode, value,
1140                               bitnum, NULL_RTX, 1);
1141     }
1142
1143   /* Now clear the chosen bits in OP0,
1144      except that if VALUE is -1 we need not bother.  */
1145   /* We keep the intermediates in registers to allow CSE to combine
1146      consecutive bitfield assignments.  */
1147
1148   temp = force_reg (mode, op0);
1149
1150   if (! all_one)
1151     {
1152       temp = expand_binop (mode, and_optab, temp,
1153                            mask_rtx (mode, bitnum, bitsize, 1),
1154                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1155       temp = force_reg (mode, temp);
1156     }
1157
1158   /* Now logical-or VALUE into OP0, unless it is zero.  */
1159
1160   if (! all_zero)
1161     {
1162       temp = expand_binop (mode, ior_optab, temp, value,
1163                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1164       temp = force_reg (mode, temp);
1165     }
1166
1167   if (op0 != temp)
1168     {
1169       op0 = copy_rtx (op0);
1170       emit_move_insn (op0, temp);
1171     }
1172 }
1173 \f
1174 /* Store a bit field that is split across multiple accessible memory objects.
1175
1176    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1177    BITSIZE is the field width; BITPOS the position of its first bit
1178    (within the word).
1179    VALUE is the value to store.
1180
1181    This does not yet handle fields wider than BITS_PER_WORD.  */
1182
1183 static void
1184 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1185                        unsigned HOST_WIDE_INT bitpos,
1186                        unsigned HOST_WIDE_INT bitregion_start,
1187                        unsigned HOST_WIDE_INT bitregion_end,
1188                        rtx value)
1189 {
1190   unsigned int unit;
1191   unsigned int bitsdone = 0;
1192
1193   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1194      much at a time.  */
1195   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1196     unit = BITS_PER_WORD;
1197   else
1198     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1199
1200   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1201      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1202      again, and we will mutually recurse forever.  */
1203   if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1204     unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1205
1206   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1207      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1208      that VALUE might be a floating-point constant.  */
1209   if (CONSTANT_P (value) && !CONST_INT_P (value))
1210     {
1211       rtx word = gen_lowpart_common (word_mode, value);
1212
1213       if (word && (value != word))
1214         value = word;
1215       else
1216         value = gen_lowpart_common (word_mode,
1217                                     force_reg (GET_MODE (value) != VOIDmode
1218                                                ? GET_MODE (value)
1219                                                : word_mode, value));
1220     }
1221
1222   while (bitsdone < bitsize)
1223     {
1224       unsigned HOST_WIDE_INT thissize;
1225       rtx part, word;
1226       unsigned HOST_WIDE_INT thispos;
1227       unsigned HOST_WIDE_INT offset;
1228
1229       offset = (bitpos + bitsdone) / unit;
1230       thispos = (bitpos + bitsdone) % unit;
1231
1232       /* When region of bytes we can touch is restricted, decrease
1233          UNIT close to the end of the region as needed.  If op0 is a REG
1234          or SUBREG of REG, don't do this, as there can't be data races
1235          on a register and we can expand shorter code in some cases.  */
1236       if (bitregion_end
1237           && unit > BITS_PER_UNIT
1238           && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1239           && !REG_P (op0)
1240           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1241         {
1242           unit = unit / 2;
1243           continue;
1244         }
1245
1246       /* THISSIZE must not overrun a word boundary.  Otherwise,
1247          store_fixed_bit_field will call us again, and we will mutually
1248          recurse forever.  */
1249       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1250       thissize = MIN (thissize, unit - thispos);
1251
1252       if (BYTES_BIG_ENDIAN)
1253         {
1254           /* Fetch successively less significant portions.  */
1255           if (CONST_INT_P (value))
1256             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1257                              >> (bitsize - bitsdone - thissize))
1258                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1259           else
1260             {
1261               int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1262               /* The args are chosen so that the last part includes the
1263                  lsb.  Give extract_bit_field the value it needs (with
1264                  endianness compensation) to fetch the piece we want.  */
1265               part = extract_fixed_bit_field (word_mode, value, thissize,
1266                                               total_bits - bitsize + bitsdone,
1267                                               NULL_RTX, 1);
1268             }
1269         }
1270       else
1271         {
1272           /* Fetch successively more significant portions.  */
1273           if (CONST_INT_P (value))
1274             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1275                              >> bitsdone)
1276                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1277           else
1278             part = extract_fixed_bit_field (word_mode, value, thissize,
1279                                             bitsdone, NULL_RTX, 1);
1280         }
1281
1282       /* If OP0 is a register, then handle OFFSET here.
1283
1284          When handling multiword bitfields, extract_bit_field may pass
1285          down a word_mode SUBREG of a larger REG for a bitfield that actually
1286          crosses a word boundary.  Thus, for a SUBREG, we must find
1287          the current word starting from the base register.  */
1288       if (GET_CODE (op0) == SUBREG)
1289         {
1290           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD)
1291                             + (offset * unit / BITS_PER_WORD);
1292           machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1293           if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1294             word = word_offset ? const0_rtx : op0;
1295           else
1296             word = operand_subword_force (SUBREG_REG (op0), word_offset,
1297                                           GET_MODE (SUBREG_REG (op0)));
1298           offset &= BITS_PER_WORD / unit - 1;
1299         }
1300       else if (REG_P (op0))
1301         {
1302           machine_mode op0_mode = GET_MODE (op0);
1303           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1304             word = offset ? const0_rtx : op0;
1305           else
1306             word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1307                                           GET_MODE (op0));
1308           offset &= BITS_PER_WORD / unit - 1;
1309         }
1310       else
1311         word = op0;
1312
1313       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1314          it is just an out-of-bounds access.  Ignore it.  */
1315       if (word != const0_rtx)
1316         store_fixed_bit_field (word, thissize, offset * unit + thispos,
1317                                bitregion_start, bitregion_end, part);
1318       bitsdone += thissize;
1319     }
1320 }
1321 \f
1322 /* A subroutine of extract_bit_field_1 that converts return value X
1323    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1324    to extract_bit_field.  */
1325
1326 static rtx
1327 convert_extracted_bit_field (rtx x, machine_mode mode,
1328                              machine_mode tmode, bool unsignedp)
1329 {
1330   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1331     return x;
1332
1333   /* If the x mode is not a scalar integral, first convert to the
1334      integer mode of that size and then access it as a floating-point
1335      value via a SUBREG.  */
1336   if (!SCALAR_INT_MODE_P (tmode))
1337     {
1338       machine_mode smode;
1339
1340       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1341       x = convert_to_mode (smode, x, unsignedp);
1342       x = force_reg (smode, x);
1343       return gen_lowpart (tmode, x);
1344     }
1345
1346   return convert_to_mode (tmode, x, unsignedp);
1347 }
1348
1349 /* Try to use an ext(z)v pattern to extract a field from OP0.
1350    Return the extracted value on success, otherwise return null.
1351    EXT_MODE is the mode of the extraction and the other arguments
1352    are as for extract_bit_field.  */
1353
1354 static rtx
1355 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1356                               unsigned HOST_WIDE_INT bitsize,
1357                               unsigned HOST_WIDE_INT bitnum,
1358                               int unsignedp, rtx target,
1359                               machine_mode mode, machine_mode tmode)
1360 {
1361   struct expand_operand ops[4];
1362   rtx spec_target = target;
1363   rtx spec_target_subreg = 0;
1364   machine_mode ext_mode = extv->field_mode;
1365   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1366
1367   if (bitsize == 0 || unit < bitsize)
1368     return NULL_RTX;
1369
1370   if (MEM_P (op0))
1371     /* Get a reference to the first byte of the field.  */
1372     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1373                                 &bitnum);
1374   else
1375     {
1376       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1377       if (BYTES_BIG_ENDIAN)
1378         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1379
1380       /* If op0 is a register, we need it in EXT_MODE to make it
1381          acceptable to the format of ext(z)v.  */
1382       if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1383         return NULL_RTX;
1384       if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1385         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1386     }
1387
1388   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1389      "backwards" from the size of the unit we are extracting from.
1390      Otherwise, we count bits from the most significant on a
1391      BYTES/BITS_BIG_ENDIAN machine.  */
1392
1393   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1394     bitnum = unit - bitsize - bitnum;
1395
1396   if (target == 0)
1397     target = spec_target = gen_reg_rtx (tmode);
1398
1399   if (GET_MODE (target) != ext_mode)
1400     {
1401       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1402          between the mode of the extraction (word_mode) and the target
1403          mode.  Instead, create a temporary and use convert_move to set
1404          the target.  */
1405       if (REG_P (target)
1406           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1407         {
1408           target = gen_lowpart (ext_mode, target);
1409           if (GET_MODE_PRECISION (ext_mode)
1410               > GET_MODE_PRECISION (GET_MODE (spec_target)))
1411             spec_target_subreg = target;
1412         }
1413       else
1414         target = gen_reg_rtx (ext_mode);
1415     }
1416
1417   create_output_operand (&ops[0], target, ext_mode);
1418   create_fixed_operand (&ops[1], op0);
1419   create_integer_operand (&ops[2], bitsize);
1420   create_integer_operand (&ops[3], bitnum);
1421   if (maybe_expand_insn (extv->icode, 4, ops))
1422     {
1423       target = ops[0].value;
1424       if (target == spec_target)
1425         return target;
1426       if (target == spec_target_subreg)
1427         return spec_target;
1428       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1429     }
1430   return NULL_RTX;
1431 }
1432
1433 /* A subroutine of extract_bit_field, with the same arguments.
1434    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1435    if we can find no other means of implementing the operation.
1436    if FALLBACK_P is false, return NULL instead.  */
1437
1438 static rtx
1439 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1440                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1441                      machine_mode mode, machine_mode tmode,
1442                      bool fallback_p)
1443 {
1444   rtx op0 = str_rtx;
1445   machine_mode int_mode;
1446   machine_mode mode1;
1447
1448   if (tmode == VOIDmode)
1449     tmode = mode;
1450
1451   while (GET_CODE (op0) == SUBREG)
1452     {
1453       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1454       op0 = SUBREG_REG (op0);
1455     }
1456
1457   /* If we have an out-of-bounds access to a register, just return an
1458      uninitialized register of the required mode.  This can occur if the
1459      source code contains an out-of-bounds access to a small array.  */
1460   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1461     return gen_reg_rtx (tmode);
1462
1463   if (REG_P (op0)
1464       && mode == GET_MODE (op0)
1465       && bitnum == 0
1466       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1467     {
1468       /* We're trying to extract a full register from itself.  */
1469       return op0;
1470     }
1471
1472   /* See if we can get a better vector mode before extracting.  */
1473   if (VECTOR_MODE_P (GET_MODE (op0))
1474       && !MEM_P (op0)
1475       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1476     {
1477       machine_mode new_mode;
1478
1479       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1480         new_mode = MIN_MODE_VECTOR_FLOAT;
1481       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1482         new_mode = MIN_MODE_VECTOR_FRACT;
1483       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1484         new_mode = MIN_MODE_VECTOR_UFRACT;
1485       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1486         new_mode = MIN_MODE_VECTOR_ACCUM;
1487       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1488         new_mode = MIN_MODE_VECTOR_UACCUM;
1489       else
1490         new_mode = MIN_MODE_VECTOR_INT;
1491
1492       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1493         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1494             && targetm.vector_mode_supported_p (new_mode))
1495           break;
1496       if (new_mode != VOIDmode)
1497         op0 = gen_lowpart (new_mode, op0);
1498     }
1499
1500   /* Use vec_extract patterns for extracting parts of vectors whenever
1501      available.  */
1502   if (VECTOR_MODE_P (GET_MODE (op0))
1503       && !MEM_P (op0)
1504       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1505       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1506           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1507     {
1508       struct expand_operand ops[3];
1509       machine_mode outermode = GET_MODE (op0);
1510       machine_mode innermode = GET_MODE_INNER (outermode);
1511       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1512       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1513
1514       create_output_operand (&ops[0], target, innermode);
1515       create_input_operand (&ops[1], op0, outermode);
1516       create_integer_operand (&ops[2], pos);
1517       if (maybe_expand_insn (icode, 3, ops))
1518         {
1519           target = ops[0].value;
1520           if (GET_MODE (target) != mode)
1521             return gen_lowpart (tmode, target);
1522           return target;
1523         }
1524     }
1525
1526   /* Make sure we are playing with integral modes.  Pun with subregs
1527      if we aren't.  */
1528   {
1529     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1530     if (imode != GET_MODE (op0))
1531       {
1532         if (MEM_P (op0))
1533           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1534         else if (imode != BLKmode)
1535           {
1536             op0 = gen_lowpart (imode, op0);
1537
1538             /* If we got a SUBREG, force it into a register since we
1539                aren't going to be able to do another SUBREG on it.  */
1540             if (GET_CODE (op0) == SUBREG)
1541               op0 = force_reg (imode, op0);
1542           }
1543         else if (REG_P (op0))
1544           {
1545             rtx reg, subreg;
1546             imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1547                                             MODE_INT);
1548             reg = gen_reg_rtx (imode);
1549             subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1550             emit_move_insn (subreg, op0);
1551             op0 = reg;
1552             bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1553           }
1554         else
1555           {
1556             HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1557             rtx mem = assign_stack_temp (GET_MODE (op0), size);
1558             emit_move_insn (mem, op0);
1559             op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1560           }
1561       }
1562   }
1563
1564   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1565      If that's wrong, the solution is to test for it and set TARGET to 0
1566      if needed.  */
1567
1568   /* Get the mode of the field to use for atomic access or subreg
1569      conversion.  */
1570   mode1 = mode;
1571   if (SCALAR_INT_MODE_P (tmode))
1572     {
1573       machine_mode try_mode = mode_for_size (bitsize,
1574                                                   GET_MODE_CLASS (tmode), 0);
1575       if (try_mode != BLKmode)
1576         mode1 = try_mode;
1577     }
1578   gcc_assert (mode1 != BLKmode);
1579
1580   /* Extraction of a full MODE1 value can be done with a subreg as long
1581      as the least significant bit of the value is the least significant
1582      bit of either OP0 or a word of OP0.  */
1583   if (!MEM_P (op0)
1584       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1585       && bitsize == GET_MODE_BITSIZE (mode1)
1586       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1587     {
1588       rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1589                                      bitnum / BITS_PER_UNIT);
1590       if (sub)
1591         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1592     }
1593
1594   /* Extraction of a full MODE1 value can be done with a load as long as
1595      the field is on a byte boundary and is sufficiently aligned.  */
1596   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1597     {
1598       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1599       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1600     }
1601
1602   /* Handle fields bigger than a word.  */
1603
1604   if (bitsize > BITS_PER_WORD)
1605     {
1606       /* Here we transfer the words of the field
1607          in the order least significant first.
1608          This is because the most significant word is the one which may
1609          be less than full.  */
1610
1611       unsigned int backwards = WORDS_BIG_ENDIAN;
1612       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1613       unsigned int i;
1614       rtx_insn *last;
1615
1616       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1617         target = gen_reg_rtx (mode);
1618
1619       /* Indicate for flow that the entire target reg is being set.  */
1620       emit_clobber (target);
1621
1622       last = get_last_insn ();
1623       for (i = 0; i < nwords; i++)
1624         {
1625           /* If I is 0, use the low-order word in both field and target;
1626              if I is 1, use the next to lowest word; and so on.  */
1627           /* Word number in TARGET to use.  */
1628           unsigned int wordnum
1629             = (backwards
1630                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1631                : i);
1632           /* Offset from start of field in OP0.  */
1633           unsigned int bit_offset = (backwards
1634                                      ? MAX ((int) bitsize - ((int) i + 1)
1635                                             * BITS_PER_WORD,
1636                                             0)
1637                                      : (int) i * BITS_PER_WORD);
1638           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1639           rtx result_part
1640             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1641                                              bitsize - i * BITS_PER_WORD),
1642                                    bitnum + bit_offset, 1, target_part,
1643                                    mode, word_mode, fallback_p);
1644
1645           gcc_assert (target_part);
1646           if (!result_part)
1647             {
1648               delete_insns_since (last);
1649               return NULL;
1650             }
1651
1652           if (result_part != target_part)
1653             emit_move_insn (target_part, result_part);
1654         }
1655
1656       if (unsignedp)
1657         {
1658           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1659              need to be zero'd out.  */
1660           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1661             {
1662               unsigned int i, total_words;
1663
1664               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1665               for (i = nwords; i < total_words; i++)
1666                 emit_move_insn
1667                   (operand_subword (target,
1668                                     backwards ? total_words - i - 1 : i,
1669                                     1, VOIDmode),
1670                    const0_rtx);
1671             }
1672           return target;
1673         }
1674
1675       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1676       target = expand_shift (LSHIFT_EXPR, mode, target,
1677                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1678       return expand_shift (RSHIFT_EXPR, mode, target,
1679                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1680     }
1681
1682   /* If OP0 is a multi-word register, narrow it to the affected word.
1683      If the region spans two words, defer to extract_split_bit_field.  */
1684   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1685     {
1686       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1687                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1688       bitnum %= BITS_PER_WORD;
1689       if (bitnum + bitsize > BITS_PER_WORD)
1690         {
1691           if (!fallback_p)
1692             return NULL_RTX;
1693           target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1694           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1695         }
1696     }
1697
1698   /* From here on we know the desired field is smaller than a word.
1699      If OP0 is a register, it too fits within a word.  */
1700   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1701   extraction_insn extv;
1702   if (!MEM_P (op0)
1703       /* ??? We could limit the structure size to the part of OP0 that
1704          contains the field, with appropriate checks for endianness
1705          and TRULY_NOOP_TRUNCATION.  */
1706       && get_best_reg_extraction_insn (&extv, pattern,
1707                                        GET_MODE_BITSIZE (GET_MODE (op0)),
1708                                        tmode))
1709     {
1710       rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1711                                                  unsignedp, target, mode,
1712                                                  tmode);
1713       if (result)
1714         return result;
1715     }
1716
1717   /* If OP0 is a memory, try copying it to a register and seeing if a
1718      cheap register alternative is available.  */
1719   if (MEM_P (op0))
1720     {
1721       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1722                                         tmode))
1723         {
1724           rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1725                                                      bitnum, unsignedp,
1726                                                      target, mode,
1727                                                      tmode);
1728           if (result)
1729             return result;
1730         }
1731
1732       rtx_insn *last = get_last_insn ();
1733
1734       /* Try loading part of OP0 into a register and extracting the
1735          bitfield from that.  */
1736       unsigned HOST_WIDE_INT bitpos;
1737       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1738                                                0, 0, tmode, &bitpos);
1739       if (xop0)
1740         {
1741           xop0 = copy_to_reg (xop0);
1742           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1743                                             unsignedp, target,
1744                                             mode, tmode, false);
1745           if (result)
1746             return result;
1747           delete_insns_since (last);
1748         }
1749     }
1750
1751   if (!fallback_p)
1752     return NULL;
1753
1754   /* Find a correspondingly-sized integer field, so we can apply
1755      shifts and masks to it.  */
1756   int_mode = int_mode_for_mode (tmode);
1757   if (int_mode == BLKmode)
1758     int_mode = int_mode_for_mode (mode);
1759   /* Should probably push op0 out to memory and then do a load.  */
1760   gcc_assert (int_mode != BLKmode);
1761
1762   target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
1763                                     target, unsignedp);
1764   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1765 }
1766
1767 /* Generate code to extract a byte-field from STR_RTX
1768    containing BITSIZE bits, starting at BITNUM,
1769    and put it in TARGET if possible (if TARGET is nonzero).
1770    Regardless of TARGET, we return the rtx for where the value is placed.
1771
1772    STR_RTX is the structure containing the byte (a REG or MEM).
1773    UNSIGNEDP is nonzero if this is an unsigned bit field.
1774    MODE is the natural mode of the field value once extracted.
1775    TMODE is the mode the caller would like the value to have;
1776    but the value may be returned with type MODE instead.
1777
1778    If a TARGET is specified and we can store in it at no extra cost,
1779    we do so, and return TARGET.
1780    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1781    if they are equally easy.  */
1782
1783 rtx
1784 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1785                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1786                    machine_mode mode, machine_mode tmode)
1787 {
1788   machine_mode mode1;
1789
1790   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1791   if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1792     mode1 = GET_MODE (str_rtx);
1793   else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1794     mode1 = GET_MODE (target);
1795   else
1796     mode1 = tmode;
1797
1798   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1799     {
1800       /* Extraction of a full MODE1 value can be done with a simple load.
1801          We know here that the field can be accessed with one single
1802          instruction.  For targets that support unaligned memory,
1803          an unaligned access may be necessary.  */
1804       if (bitsize == GET_MODE_BITSIZE (mode1))
1805         {
1806           rtx result = adjust_bitfield_address (str_rtx, mode1,
1807                                                 bitnum / BITS_PER_UNIT);
1808           gcc_assert (bitnum % BITS_PER_UNIT == 0);
1809           return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1810         }
1811
1812       str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1813                                       &bitnum);
1814       gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (mode1));
1815       str_rtx = copy_to_reg (str_rtx);
1816     }
1817
1818   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1819                               target, mode, tmode, true);
1820 }
1821 \f
1822 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1823    from bit BITNUM of OP0.
1824
1825    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1826    If TARGET is nonzero, attempts to store the value there
1827    and return TARGET, but this is not guaranteed.
1828    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1829
1830 static rtx
1831 extract_fixed_bit_field (machine_mode tmode, rtx op0,
1832                          unsigned HOST_WIDE_INT bitsize,
1833                          unsigned HOST_WIDE_INT bitnum, rtx target,
1834                          int unsignedp)
1835 {
1836   if (MEM_P (op0))
1837     {
1838       machine_mode mode
1839         = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1840                          MEM_VOLATILE_P (op0));
1841
1842       if (mode == VOIDmode)
1843         /* The only way this should occur is if the field spans word
1844            boundaries.  */
1845         return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1846
1847       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1848     }
1849
1850   return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1851                                     target, unsignedp);
1852 }
1853
1854 /* Helper function for extract_fixed_bit_field, extracts
1855    the bit field always using the MODE of OP0.  */
1856
1857 static rtx
1858 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
1859                            unsigned HOST_WIDE_INT bitsize,
1860                            unsigned HOST_WIDE_INT bitnum, rtx target,
1861                            int unsignedp)
1862 {
1863   machine_mode mode = GET_MODE (op0);
1864   gcc_assert (SCALAR_INT_MODE_P (mode));
1865
1866   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1867      for invalid input, such as extract equivalent of f5 from
1868      gcc.dg/pr48335-2.c.  */
1869
1870   if (BYTES_BIG_ENDIAN)
1871     /* BITNUM is the distance between our msb and that of OP0.
1872        Convert it to the distance from the lsb.  */
1873     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1874
1875   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1876      We have reduced the big-endian case to the little-endian case.  */
1877
1878   if (unsignedp)
1879     {
1880       if (bitnum)
1881         {
1882           /* If the field does not already start at the lsb,
1883              shift it so it does.  */
1884           /* Maybe propagate the target for the shift.  */
1885           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1886           if (tmode != mode)
1887             subtarget = 0;
1888           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1889         }
1890       /* Convert the value to the desired mode.  */
1891       if (mode != tmode)
1892         op0 = convert_to_mode (tmode, op0, 1);
1893
1894       /* Unless the msb of the field used to be the msb when we shifted,
1895          mask out the upper bits.  */
1896
1897       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
1898         return expand_binop (GET_MODE (op0), and_optab, op0,
1899                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1900                              target, 1, OPTAB_LIB_WIDEN);
1901       return op0;
1902     }
1903
1904   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1905      then arithmetic-shift its lsb to the lsb of the word.  */
1906   op0 = force_reg (mode, op0);
1907
1908   /* Find the narrowest integer mode that contains the field.  */
1909
1910   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1911        mode = GET_MODE_WIDER_MODE (mode))
1912     if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
1913       {
1914         op0 = convert_to_mode (mode, op0, 0);
1915         break;
1916       }
1917
1918   if (mode != tmode)
1919     target = 0;
1920
1921   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
1922     {
1923       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
1924       /* Maybe propagate the target for the shift.  */
1925       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1926       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1927     }
1928
1929   return expand_shift (RSHIFT_EXPR, mode, op0,
1930                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1931 }
1932
1933 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1934    VALUE << BITPOS.  */
1935
1936 static rtx
1937 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
1938               int bitpos)
1939 {
1940   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
1941 }
1942 \f
1943 /* Extract a bit field that is split across two words
1944    and return an RTX for the result.
1945
1946    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1947    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1948    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1949
1950 static rtx
1951 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1952                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1953 {
1954   unsigned int unit;
1955   unsigned int bitsdone = 0;
1956   rtx result = NULL_RTX;
1957   int first = 1;
1958
1959   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1960      much at a time.  */
1961   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1962     unit = BITS_PER_WORD;
1963   else
1964     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1965
1966   while (bitsdone < bitsize)
1967     {
1968       unsigned HOST_WIDE_INT thissize;
1969       rtx part, word;
1970       unsigned HOST_WIDE_INT thispos;
1971       unsigned HOST_WIDE_INT offset;
1972
1973       offset = (bitpos + bitsdone) / unit;
1974       thispos = (bitpos + bitsdone) % unit;
1975
1976       /* THISSIZE must not overrun a word boundary.  Otherwise,
1977          extract_fixed_bit_field will call us again, and we will mutually
1978          recurse forever.  */
1979       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1980       thissize = MIN (thissize, unit - thispos);
1981
1982       /* If OP0 is a register, then handle OFFSET here.
1983
1984          When handling multiword bitfields, extract_bit_field may pass
1985          down a word_mode SUBREG of a larger REG for a bitfield that actually
1986          crosses a word boundary.  Thus, for a SUBREG, we must find
1987          the current word starting from the base register.  */
1988       if (GET_CODE (op0) == SUBREG)
1989         {
1990           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1991           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1992                                         GET_MODE (SUBREG_REG (op0)));
1993           offset = 0;
1994         }
1995       else if (REG_P (op0))
1996         {
1997           word = operand_subword_force (op0, offset, GET_MODE (op0));
1998           offset = 0;
1999         }
2000       else
2001         word = op0;
2002
2003       /* Extract the parts in bit-counting order,
2004          whose meaning is determined by BYTES_PER_UNIT.
2005          OFFSET is in UNITs, and UNIT is in bits.  */
2006       part = extract_fixed_bit_field (word_mode, word, thissize,
2007                                       offset * unit + thispos, 0, 1);
2008       bitsdone += thissize;
2009
2010       /* Shift this part into place for the result.  */
2011       if (BYTES_BIG_ENDIAN)
2012         {
2013           if (bitsize != bitsdone)
2014             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2015                                  bitsize - bitsdone, 0, 1);
2016         }
2017       else
2018         {
2019           if (bitsdone != thissize)
2020             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2021                                  bitsdone - thissize, 0, 1);
2022         }
2023
2024       if (first)
2025         result = part;
2026       else
2027         /* Combine the parts with bitwise or.  This works
2028            because we extracted each part as an unsigned bit field.  */
2029         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2030                                OPTAB_LIB_WIDEN);
2031
2032       first = 0;
2033     }
2034
2035   /* Unsigned bit field: we are done.  */
2036   if (unsignedp)
2037     return result;
2038   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2039   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2040                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2041   return expand_shift (RSHIFT_EXPR, word_mode, result,
2042                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2043 }
2044 \f
2045 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2046    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2047    MODE, fill the upper bits with zeros.  Fail if the layout of either
2048    mode is unknown (as for CC modes) or if the extraction would involve
2049    unprofitable mode punning.  Return the value on success, otherwise
2050    return null.
2051
2052    This is different from gen_lowpart* in these respects:
2053
2054      - the returned value must always be considered an rvalue
2055
2056      - when MODE is wider than SRC_MODE, the extraction involves
2057        a zero extension
2058
2059      - when MODE is smaller than SRC_MODE, the extraction involves
2060        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2061
2062    In other words, this routine performs a computation, whereas the
2063    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2064    operations.  */
2065
2066 rtx
2067 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2068 {
2069   machine_mode int_mode, src_int_mode;
2070
2071   if (mode == src_mode)
2072     return src;
2073
2074   if (CONSTANT_P (src))
2075     {
2076       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2077          fails, it will happily create (subreg (symbol_ref)) or similar
2078          invalid SUBREGs.  */
2079       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2080       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2081       if (ret)
2082         return ret;
2083
2084       if (GET_MODE (src) == VOIDmode
2085           || !validate_subreg (mode, src_mode, src, byte))
2086         return NULL_RTX;
2087
2088       src = force_reg (GET_MODE (src), src);
2089       return gen_rtx_SUBREG (mode, src, byte);
2090     }
2091
2092   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2093     return NULL_RTX;
2094
2095   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2096       && MODES_TIEABLE_P (mode, src_mode))
2097     {
2098       rtx x = gen_lowpart_common (mode, src);
2099       if (x)
2100         return x;
2101     }
2102
2103   src_int_mode = int_mode_for_mode (src_mode);
2104   int_mode = int_mode_for_mode (mode);
2105   if (src_int_mode == BLKmode || int_mode == BLKmode)
2106     return NULL_RTX;
2107
2108   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2109     return NULL_RTX;
2110   if (!MODES_TIEABLE_P (int_mode, mode))
2111     return NULL_RTX;
2112
2113   src = gen_lowpart (src_int_mode, src);
2114   src = convert_modes (int_mode, src_int_mode, src, true);
2115   src = gen_lowpart (mode, src);
2116   return src;
2117 }
2118 \f
2119 /* Add INC into TARGET.  */
2120
2121 void
2122 expand_inc (rtx target, rtx inc)
2123 {
2124   rtx value = expand_binop (GET_MODE (target), add_optab,
2125                             target, inc,
2126                             target, 0, OPTAB_LIB_WIDEN);
2127   if (value != target)
2128     emit_move_insn (target, value);
2129 }
2130
2131 /* Subtract DEC from TARGET.  */
2132
2133 void
2134 expand_dec (rtx target, rtx dec)
2135 {
2136   rtx value = expand_binop (GET_MODE (target), sub_optab,
2137                             target, dec,
2138                             target, 0, OPTAB_LIB_WIDEN);
2139   if (value != target)
2140     emit_move_insn (target, value);
2141 }
2142 \f
2143 /* Output a shift instruction for expression code CODE,
2144    with SHIFTED being the rtx for the value to shift,
2145    and AMOUNT the rtx for the amount to shift by.
2146    Store the result in the rtx TARGET, if that is convenient.
2147    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2148    Return the rtx for where the value is.  */
2149
2150 static rtx
2151 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2152                 rtx amount, rtx target, int unsignedp)
2153 {
2154   rtx op1, temp = 0;
2155   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2156   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2157   optab lshift_optab = ashl_optab;
2158   optab rshift_arith_optab = ashr_optab;
2159   optab rshift_uns_optab = lshr_optab;
2160   optab lrotate_optab = rotl_optab;
2161   optab rrotate_optab = rotr_optab;
2162   machine_mode op1_mode;
2163   machine_mode scalar_mode = mode;
2164   int attempt;
2165   bool speed = optimize_insn_for_speed_p ();
2166
2167   if (VECTOR_MODE_P (mode))
2168     scalar_mode = GET_MODE_INNER (mode);
2169   op1 = amount;
2170   op1_mode = GET_MODE (op1);
2171
2172   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2173      shift amount is a vector, use the vector/vector shift patterns.  */
2174   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2175     {
2176       lshift_optab = vashl_optab;
2177       rshift_arith_optab = vashr_optab;
2178       rshift_uns_optab = vlshr_optab;
2179       lrotate_optab = vrotl_optab;
2180       rrotate_optab = vrotr_optab;
2181     }
2182
2183   /* Previously detected shift-counts computed by NEGATE_EXPR
2184      and shifted in the other direction; but that does not work
2185      on all machines.  */
2186
2187   if (SHIFT_COUNT_TRUNCATED)
2188     {
2189       if (CONST_INT_P (op1)
2190           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2191               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2192         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2193                        % GET_MODE_BITSIZE (scalar_mode));
2194       else if (GET_CODE (op1) == SUBREG
2195                && subreg_lowpart_p (op1)
2196                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2197                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2198         op1 = SUBREG_REG (op1);
2199     }
2200
2201   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2202      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2203      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2204      amount instead.  */
2205   if (rotate
2206       && CONST_INT_P (op1)
2207       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2208                    GET_MODE_BITSIZE (scalar_mode) - 1))
2209     {
2210       op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2211       left = !left;
2212       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2213     }
2214
2215   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2216      Note that this is not the case for bigger values.  For instance a rotation
2217      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2218      0x04030201 (bswapsi).  */
2219   if (rotate
2220       && CONST_INT_P (op1)
2221       && INTVAL (op1) == BITS_PER_UNIT
2222       && GET_MODE_SIZE (scalar_mode) == 2
2223       && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2224     return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2225                                   unsignedp);
2226
2227   if (op1 == const0_rtx)
2228     return shifted;
2229
2230   /* Check whether its cheaper to implement a left shift by a constant
2231      bit count by a sequence of additions.  */
2232   if (code == LSHIFT_EXPR
2233       && CONST_INT_P (op1)
2234       && INTVAL (op1) > 0
2235       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2236       && INTVAL (op1) < MAX_BITS_PER_WORD
2237       && (shift_cost (speed, mode, INTVAL (op1))
2238           > INTVAL (op1) * add_cost (speed, mode))
2239       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2240     {
2241       int i;
2242       for (i = 0; i < INTVAL (op1); i++)
2243         {
2244           temp = force_reg (mode, shifted);
2245           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2246                                   unsignedp, OPTAB_LIB_WIDEN);
2247         }
2248       return shifted;
2249     }
2250
2251   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2252     {
2253       enum optab_methods methods;
2254
2255       if (attempt == 0)
2256         methods = OPTAB_DIRECT;
2257       else if (attempt == 1)
2258         methods = OPTAB_WIDEN;
2259       else
2260         methods = OPTAB_LIB_WIDEN;
2261
2262       if (rotate)
2263         {
2264           /* Widening does not work for rotation.  */
2265           if (methods == OPTAB_WIDEN)
2266             continue;
2267           else if (methods == OPTAB_LIB_WIDEN)
2268             {
2269               /* If we have been unable to open-code this by a rotation,
2270                  do it as the IOR of two shifts.  I.e., to rotate A
2271                  by N bits, compute
2272                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2273                  where C is the bitsize of A.
2274
2275                  It is theoretically possible that the target machine might
2276                  not be able to perform either shift and hence we would
2277                  be making two libcalls rather than just the one for the
2278                  shift (similarly if IOR could not be done).  We will allow
2279                  this extremely unlikely lossage to avoid complicating the
2280                  code below.  */
2281
2282               rtx subtarget = target == shifted ? 0 : target;
2283               rtx new_amount, other_amount;
2284               rtx temp1;
2285
2286               new_amount = op1;
2287               if (op1 == const0_rtx)
2288                 return shifted;
2289               else if (CONST_INT_P (op1))
2290                 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2291                                         - INTVAL (op1));
2292               else
2293                 {
2294                   other_amount
2295                     = simplify_gen_unary (NEG, GET_MODE (op1),
2296                                           op1, GET_MODE (op1));
2297                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2298                   other_amount
2299                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2300                                            gen_int_mode (mask, GET_MODE (op1)));
2301                 }
2302
2303               shifted = force_reg (mode, shifted);
2304
2305               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2306                                      mode, shifted, new_amount, 0, 1);
2307               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2308                                       mode, shifted, other_amount,
2309                                       subtarget, 1);
2310               return expand_binop (mode, ior_optab, temp, temp1, target,
2311                                    unsignedp, methods);
2312             }
2313
2314           temp = expand_binop (mode,
2315                                left ? lrotate_optab : rrotate_optab,
2316                                shifted, op1, target, unsignedp, methods);
2317         }
2318       else if (unsignedp)
2319         temp = expand_binop (mode,
2320                              left ? lshift_optab : rshift_uns_optab,
2321                              shifted, op1, target, unsignedp, methods);
2322
2323       /* Do arithmetic shifts.
2324          Also, if we are going to widen the operand, we can just as well
2325          use an arithmetic right-shift instead of a logical one.  */
2326       if (temp == 0 && ! rotate
2327           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2328         {
2329           enum optab_methods methods1 = methods;
2330
2331           /* If trying to widen a log shift to an arithmetic shift,
2332              don't accept an arithmetic shift of the same size.  */
2333           if (unsignedp)
2334             methods1 = OPTAB_MUST_WIDEN;
2335
2336           /* Arithmetic shift */
2337
2338           temp = expand_binop (mode,
2339                                left ? lshift_optab : rshift_arith_optab,
2340                                shifted, op1, target, unsignedp, methods1);
2341         }
2342
2343       /* We used to try extzv here for logical right shifts, but that was
2344          only useful for one machine, the VAX, and caused poor code
2345          generation there for lshrdi3, so the code was deleted and a
2346          define_expand for lshrsi3 was added to vax.md.  */
2347     }
2348
2349   gcc_assert (temp);
2350   return temp;
2351 }
2352
2353 /* Output a shift instruction for expression code CODE,
2354    with SHIFTED being the rtx for the value to shift,
2355    and AMOUNT the amount to shift by.
2356    Store the result in the rtx TARGET, if that is convenient.
2357    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2358    Return the rtx for where the value is.  */
2359
2360 rtx
2361 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2362               int amount, rtx target, int unsignedp)
2363 {
2364   return expand_shift_1 (code, mode,
2365                          shifted, GEN_INT (amount), target, unsignedp);
2366 }
2367
2368 /* Output a shift instruction for expression code CODE,
2369    with SHIFTED being the rtx for the value to shift,
2370    and AMOUNT the tree for the amount to shift by.
2371    Store the result in the rtx TARGET, if that is convenient.
2372    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2373    Return the rtx for where the value is.  */
2374
2375 rtx
2376 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2377                        tree amount, rtx target, int unsignedp)
2378 {
2379   return expand_shift_1 (code, mode,
2380                          shifted, expand_normal (amount), target, unsignedp);
2381 }
2382
2383 \f
2384 /* Indicates the type of fixup needed after a constant multiplication.
2385    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2386    the result should be negated, and ADD_VARIANT means that the
2387    multiplicand should be added to the result.  */
2388 enum mult_variant {basic_variant, negate_variant, add_variant};
2389
2390 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2391                         const struct mult_cost *, machine_mode mode);
2392 static bool choose_mult_variant (machine_mode, HOST_WIDE_INT,
2393                                  struct algorithm *, enum mult_variant *, int);
2394 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2395                               const struct algorithm *, enum mult_variant);
2396 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2397 static rtx extract_high_half (machine_mode, rtx);
2398 static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2399 static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
2400                                        int, int);
2401 /* Compute and return the best algorithm for multiplying by T.
2402    The algorithm must cost less than cost_limit
2403    If retval.cost >= COST_LIMIT, no algorithm was found and all
2404    other field of the returned struct are undefined.
2405    MODE is the machine mode of the multiplication.  */
2406
2407 static void
2408 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2409             const struct mult_cost *cost_limit, machine_mode mode)
2410 {
2411   int m;
2412   struct algorithm *alg_in, *best_alg;
2413   struct mult_cost best_cost;
2414   struct mult_cost new_limit;
2415   int op_cost, op_latency;
2416   unsigned HOST_WIDE_INT orig_t = t;
2417   unsigned HOST_WIDE_INT q;
2418   int maxm, hash_index;
2419   bool cache_hit = false;
2420   enum alg_code cache_alg = alg_zero;
2421   bool speed = optimize_insn_for_speed_p ();
2422   machine_mode imode;
2423   struct alg_hash_entry *entry_ptr;
2424
2425   /* Indicate that no algorithm is yet found.  If no algorithm
2426      is found, this value will be returned and indicate failure.  */
2427   alg_out->cost.cost = cost_limit->cost + 1;
2428   alg_out->cost.latency = cost_limit->latency + 1;
2429
2430   if (cost_limit->cost < 0
2431       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2432     return;
2433
2434   /* Be prepared for vector modes.  */
2435   imode = GET_MODE_INNER (mode);
2436   if (imode == VOIDmode)
2437     imode = mode;
2438
2439   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2440
2441   /* Restrict the bits of "t" to the multiplication's mode.  */
2442   t &= GET_MODE_MASK (imode);
2443
2444   /* t == 1 can be done in zero cost.  */
2445   if (t == 1)
2446     {
2447       alg_out->ops = 1;
2448       alg_out->cost.cost = 0;
2449       alg_out->cost.latency = 0;
2450       alg_out->op[0] = alg_m;
2451       return;
2452     }
2453
2454   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2455      fail now.  */
2456   if (t == 0)
2457     {
2458       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2459         return;
2460       else
2461         {
2462           alg_out->ops = 1;
2463           alg_out->cost.cost = zero_cost (speed);
2464           alg_out->cost.latency = zero_cost (speed);
2465           alg_out->op[0] = alg_zero;
2466           return;
2467         }
2468     }
2469
2470   /* We'll be needing a couple extra algorithm structures now.  */
2471
2472   alg_in = XALLOCA (struct algorithm);
2473   best_alg = XALLOCA (struct algorithm);
2474   best_cost = *cost_limit;
2475
2476   /* Compute the hash index.  */
2477   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2478
2479   /* See if we already know what to do for T.  */
2480   entry_ptr = alg_hash_entry_ptr (hash_index);
2481   if (entry_ptr->t == t
2482       && entry_ptr->mode == mode
2483       && entry_ptr->mode == mode
2484       && entry_ptr->speed == speed
2485       && entry_ptr->alg != alg_unknown)
2486     {
2487       cache_alg = entry_ptr->alg;
2488
2489       if (cache_alg == alg_impossible)
2490         {
2491           /* The cache tells us that it's impossible to synthesize
2492              multiplication by T within entry_ptr->cost.  */
2493           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2494             /* COST_LIMIT is at least as restrictive as the one
2495                recorded in the hash table, in which case we have no
2496                hope of synthesizing a multiplication.  Just
2497                return.  */
2498             return;
2499
2500           /* If we get here, COST_LIMIT is less restrictive than the
2501              one recorded in the hash table, so we may be able to
2502              synthesize a multiplication.  Proceed as if we didn't
2503              have the cache entry.  */
2504         }
2505       else
2506         {
2507           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2508             /* The cached algorithm shows that this multiplication
2509                requires more cost than COST_LIMIT.  Just return.  This
2510                way, we don't clobber this cache entry with
2511                alg_impossible but retain useful information.  */
2512             return;
2513
2514           cache_hit = true;
2515
2516           switch (cache_alg)
2517             {
2518             case alg_shift:
2519               goto do_alg_shift;
2520
2521             case alg_add_t_m2:
2522             case alg_sub_t_m2:
2523               goto do_alg_addsub_t_m2;
2524
2525             case alg_add_factor:
2526             case alg_sub_factor:
2527               goto do_alg_addsub_factor;
2528
2529             case alg_add_t2_m:
2530               goto do_alg_add_t2_m;
2531
2532             case alg_sub_t2_m:
2533               goto do_alg_sub_t2_m;
2534
2535             default:
2536               gcc_unreachable ();
2537             }
2538         }
2539     }
2540
2541   /* If we have a group of zero bits at the low-order part of T, try
2542      multiplying by the remaining bits and then doing a shift.  */
2543
2544   if ((t & 1) == 0)
2545     {
2546     do_alg_shift:
2547       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2548       if (m < maxm)
2549         {
2550           q = t >> m;
2551           /* The function expand_shift will choose between a shift and
2552              a sequence of additions, so the observed cost is given as
2553              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2554           op_cost = m * add_cost (speed, mode);
2555           if (shift_cost (speed, mode, m) < op_cost)
2556             op_cost = shift_cost (speed, mode, m);
2557           new_limit.cost = best_cost.cost - op_cost;
2558           new_limit.latency = best_cost.latency - op_cost;
2559           synth_mult (alg_in, q, &new_limit, mode);
2560
2561           alg_in->cost.cost += op_cost;
2562           alg_in->cost.latency += op_cost;
2563           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2564             {
2565               best_cost = alg_in->cost;
2566               std::swap (alg_in, best_alg);
2567               best_alg->log[best_alg->ops] = m;
2568               best_alg->op[best_alg->ops] = alg_shift;
2569             }
2570
2571           /* See if treating ORIG_T as a signed number yields a better
2572              sequence.  Try this sequence only for a negative ORIG_T
2573              as it would be useless for a non-negative ORIG_T.  */
2574           if ((HOST_WIDE_INT) orig_t < 0)
2575             {
2576               /* Shift ORIG_T as follows because a right shift of a
2577                  negative-valued signed type is implementation
2578                  defined.  */
2579               q = ~(~orig_t >> m);
2580               /* The function expand_shift will choose between a shift
2581                  and a sequence of additions, so the observed cost is
2582                  given as MIN (m * add_cost(speed, mode),
2583                  shift_cost(speed, mode, m)).  */
2584               op_cost = m * add_cost (speed, mode);
2585               if (shift_cost (speed, mode, m) < op_cost)
2586                 op_cost = shift_cost (speed, mode, m);
2587               new_limit.cost = best_cost.cost - op_cost;
2588               new_limit.latency = best_cost.latency - op_cost;
2589               synth_mult (alg_in, q, &new_limit, mode);
2590
2591               alg_in->cost.cost += op_cost;
2592               alg_in->cost.latency += op_cost;
2593               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2594                 {
2595                   best_cost = alg_in->cost;
2596                   std::swap (alg_in, best_alg);
2597                   best_alg->log[best_alg->ops] = m;
2598                   best_alg->op[best_alg->ops] = alg_shift;
2599                 }
2600             }
2601         }
2602       if (cache_hit)
2603         goto done;
2604     }
2605
2606   /* If we have an odd number, add or subtract one.  */
2607   if ((t & 1) != 0)
2608     {
2609       unsigned HOST_WIDE_INT w;
2610
2611     do_alg_addsub_t_m2:
2612       for (w = 1; (w & t) != 0; w <<= 1)
2613         ;
2614       /* If T was -1, then W will be zero after the loop.  This is another
2615          case where T ends with ...111.  Handling this with (T + 1) and
2616          subtract 1 produces slightly better code and results in algorithm
2617          selection much faster than treating it like the ...0111 case
2618          below.  */
2619       if (w == 0
2620           || (w > 2
2621               /* Reject the case where t is 3.
2622                  Thus we prefer addition in that case.  */
2623               && t != 3))
2624         {
2625           /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2626
2627           op_cost = add_cost (speed, mode);
2628           new_limit.cost = best_cost.cost - op_cost;
2629           new_limit.latency = best_cost.latency - op_cost;
2630           synth_mult (alg_in, t + 1, &new_limit, mode);
2631
2632           alg_in->cost.cost += op_cost;
2633           alg_in->cost.latency += op_cost;
2634           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2635             {
2636               best_cost = alg_in->cost;
2637               std::swap (alg_in, best_alg);
2638               best_alg->log[best_alg->ops] = 0;
2639               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2640             }
2641         }
2642       else
2643         {
2644           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2645
2646           op_cost = add_cost (speed, mode);
2647           new_limit.cost = best_cost.cost - op_cost;
2648           new_limit.latency = best_cost.latency - op_cost;
2649           synth_mult (alg_in, t - 1, &new_limit, mode);
2650
2651           alg_in->cost.cost += op_cost;
2652           alg_in->cost.latency += op_cost;
2653           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2654             {
2655               best_cost = alg_in->cost;
2656               std::swap (alg_in, best_alg);
2657               best_alg->log[best_alg->ops] = 0;
2658               best_alg->op[best_alg->ops] = alg_add_t_m2;
2659             }
2660         }
2661
2662       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2663          quickly with a - a * n for some appropriate constant n.  */
2664       m = exact_log2 (-orig_t + 1);
2665       if (m >= 0 && m < maxm)
2666         {
2667           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2668           /* If the target has a cheap shift-and-subtract insn use
2669              that in preference to a shift insn followed by a sub insn.
2670              Assume that the shift-and-sub is "atomic" with a latency
2671              equal to it's cost, otherwise assume that on superscalar
2672              hardware the shift may be executed concurrently with the
2673              earlier steps in the algorithm.  */
2674           if (shiftsub1_cost (speed, mode, m) <= op_cost)
2675             {
2676               op_cost = shiftsub1_cost (speed, mode, m);
2677               op_latency = op_cost;
2678             }
2679           else
2680             op_latency = add_cost (speed, mode);
2681
2682           new_limit.cost = best_cost.cost - op_cost;
2683           new_limit.latency = best_cost.latency - op_latency;
2684           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2685                       &new_limit, mode);
2686
2687           alg_in->cost.cost += op_cost;
2688           alg_in->cost.latency += op_latency;
2689           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2690             {
2691               best_cost = alg_in->cost;
2692               std::swap (alg_in, best_alg);
2693               best_alg->log[best_alg->ops] = m;
2694               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2695             }
2696         }
2697
2698       if (cache_hit)
2699         goto done;
2700     }
2701
2702   /* Look for factors of t of the form
2703      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2704      If we find such a factor, we can multiply by t using an algorithm that
2705      multiplies by q, shift the result by m and add/subtract it to itself.
2706
2707      We search for large factors first and loop down, even if large factors
2708      are less probable than small; if we find a large factor we will find a
2709      good sequence quickly, and therefore be able to prune (by decreasing
2710      COST_LIMIT) the search.  */
2711
2712  do_alg_addsub_factor:
2713   for (m = floor_log2 (t - 1); m >= 2; m--)
2714     {
2715       unsigned HOST_WIDE_INT d;
2716
2717       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2718       if (t % d == 0 && t > d && m < maxm
2719           && (!cache_hit || cache_alg == alg_add_factor))
2720         {
2721           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2722           if (shiftadd_cost (speed, mode, m) <= op_cost)
2723             op_cost = shiftadd_cost (speed, mode, m);
2724
2725           op_latency = op_cost;
2726
2727
2728           new_limit.cost = best_cost.cost - op_cost;
2729           new_limit.latency = best_cost.latency - op_latency;
2730           synth_mult (alg_in, t / d, &new_limit, mode);
2731
2732           alg_in->cost.cost += op_cost;
2733           alg_in->cost.latency += op_latency;
2734           if (alg_in->cost.latency < op_cost)
2735             alg_in->cost.latency = op_cost;
2736           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2737             {
2738               best_cost = alg_in->cost;
2739               std::swap (alg_in, best_alg);
2740               best_alg->log[best_alg->ops] = m;
2741               best_alg->op[best_alg->ops] = alg_add_factor;
2742             }
2743           /* Other factors will have been taken care of in the recursion.  */
2744           break;
2745         }
2746
2747       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2748       if (t % d == 0 && t > d && m < maxm
2749           && (!cache_hit || cache_alg == alg_sub_factor))
2750         {
2751           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2752           if (shiftsub0_cost (speed, mode, m) <= op_cost)
2753             op_cost = shiftsub0_cost (speed, mode, m);
2754
2755           op_latency = op_cost;
2756
2757           new_limit.cost = best_cost.cost - op_cost;
2758           new_limit.latency = best_cost.latency - op_latency;
2759           synth_mult (alg_in, t / d, &new_limit, mode);
2760
2761           alg_in->cost.cost += op_cost;
2762           alg_in->cost.latency += op_latency;
2763           if (alg_in->cost.latency < op_cost)
2764             alg_in->cost.latency = op_cost;
2765           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2766             {
2767               best_cost = alg_in->cost;
2768               std::swap (alg_in, best_alg);
2769               best_alg->log[best_alg->ops] = m;
2770               best_alg->op[best_alg->ops] = alg_sub_factor;
2771             }
2772           break;
2773         }
2774     }
2775   if (cache_hit)
2776     goto done;
2777
2778   /* Try shift-and-add (load effective address) instructions,
2779      i.e. do a*3, a*5, a*9.  */
2780   if ((t & 1) != 0)
2781     {
2782     do_alg_add_t2_m:
2783       q = t - 1;
2784       q = q & -q;
2785       m = exact_log2 (q);
2786       if (m >= 0 && m < maxm)
2787         {
2788           op_cost = shiftadd_cost (speed, mode, m);
2789           new_limit.cost = best_cost.cost - op_cost;
2790           new_limit.latency = best_cost.latency - op_cost;
2791           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2792
2793           alg_in->cost.cost += op_cost;
2794           alg_in->cost.latency += op_cost;
2795           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2796             {
2797               best_cost = alg_in->cost;
2798               std::swap (alg_in, best_alg);
2799               best_alg->log[best_alg->ops] = m;
2800               best_alg->op[best_alg->ops] = alg_add_t2_m;
2801             }
2802         }
2803       if (cache_hit)
2804         goto done;
2805
2806     do_alg_sub_t2_m:
2807       q = t + 1;
2808       q = q & -q;
2809       m = exact_log2 (q);
2810       if (m >= 0 && m < maxm)
2811         {
2812           op_cost = shiftsub0_cost (speed, mode, m);
2813           new_limit.cost = best_cost.cost - op_cost;
2814           new_limit.latency = best_cost.latency - op_cost;
2815           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2816
2817           alg_in->cost.cost += op_cost;
2818           alg_in->cost.latency += op_cost;
2819           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2820             {
2821               best_cost = alg_in->cost;
2822               std::swap (alg_in, best_alg);
2823               best_alg->log[best_alg->ops] = m;
2824               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2825             }
2826         }
2827       if (cache_hit)
2828         goto done;
2829     }
2830
2831  done:
2832   /* If best_cost has not decreased, we have not found any algorithm.  */
2833   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2834     {
2835       /* We failed to find an algorithm.  Record alg_impossible for
2836          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2837          we are asked to find an algorithm for T within the same or
2838          lower COST_LIMIT, we can immediately return to the
2839          caller.  */
2840       entry_ptr->t = t;
2841       entry_ptr->mode = mode;
2842       entry_ptr->speed = speed;
2843       entry_ptr->alg = alg_impossible;
2844       entry_ptr->cost = *cost_limit;
2845       return;
2846     }
2847
2848   /* Cache the result.  */
2849   if (!cache_hit)
2850     {
2851       entry_ptr->t = t;
2852       entry_ptr->mode = mode;
2853       entry_ptr->speed = speed;
2854       entry_ptr->alg = best_alg->op[best_alg->ops];
2855       entry_ptr->cost.cost = best_cost.cost;
2856       entry_ptr->cost.latency = best_cost.latency;
2857     }
2858
2859   /* If we are getting a too long sequence for `struct algorithm'
2860      to record, make this search fail.  */
2861   if (best_alg->ops == MAX_BITS_PER_WORD)
2862     return;
2863
2864   /* Copy the algorithm from temporary space to the space at alg_out.
2865      We avoid using structure assignment because the majority of
2866      best_alg is normally undefined, and this is a critical function.  */
2867   alg_out->ops = best_alg->ops + 1;
2868   alg_out->cost = best_cost;
2869   memcpy (alg_out->op, best_alg->op,
2870           alg_out->ops * sizeof *alg_out->op);
2871   memcpy (alg_out->log, best_alg->log,
2872           alg_out->ops * sizeof *alg_out->log);
2873 }
2874 \f
2875 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2876    Try three variations:
2877
2878        - a shift/add sequence based on VAL itself
2879        - a shift/add sequence based on -VAL, followed by a negation
2880        - a shift/add sequence based on VAL - 1, followed by an addition.
2881
2882    Return true if the cheapest of these cost less than MULT_COST,
2883    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2884
2885 static bool
2886 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
2887                      struct algorithm *alg, enum mult_variant *variant,
2888                      int mult_cost)
2889 {
2890   struct algorithm alg2;
2891   struct mult_cost limit;
2892   int op_cost;
2893   bool speed = optimize_insn_for_speed_p ();
2894
2895   /* Fail quickly for impossible bounds.  */
2896   if (mult_cost < 0)
2897     return false;
2898
2899   /* Ensure that mult_cost provides a reasonable upper bound.
2900      Any constant multiplication can be performed with less
2901      than 2 * bits additions.  */
2902   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2903   if (mult_cost > op_cost)
2904     mult_cost = op_cost;
2905
2906   *variant = basic_variant;
2907   limit.cost = mult_cost;
2908   limit.latency = mult_cost;
2909   synth_mult (alg, val, &limit, mode);
2910
2911   /* This works only if the inverted value actually fits in an
2912      `unsigned int' */
2913   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2914     {
2915       op_cost = neg_cost (speed, mode);
2916       if (MULT_COST_LESS (&alg->cost, mult_cost))
2917         {
2918           limit.cost = alg->cost.cost - op_cost;
2919           limit.latency = alg->cost.latency - op_cost;
2920         }
2921       else
2922         {
2923           limit.cost = mult_cost - op_cost;
2924           limit.latency = mult_cost - op_cost;
2925         }
2926
2927       synth_mult (&alg2, -val, &limit, mode);
2928       alg2.cost.cost += op_cost;
2929       alg2.cost.latency += op_cost;
2930       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2931         *alg = alg2, *variant = negate_variant;
2932     }
2933
2934   /* This proves very useful for division-by-constant.  */
2935   op_cost = add_cost (speed, mode);
2936   if (MULT_COST_LESS (&alg->cost, mult_cost))
2937     {
2938       limit.cost = alg->cost.cost - op_cost;
2939       limit.latency = alg->cost.latency - op_cost;
2940     }
2941   else
2942     {
2943       limit.cost = mult_cost - op_cost;
2944       limit.latency = mult_cost - op_cost;
2945     }
2946
2947   synth_mult (&alg2, val - 1, &limit, mode);
2948   alg2.cost.cost += op_cost;
2949   alg2.cost.latency += op_cost;
2950   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2951     *alg = alg2, *variant = add_variant;
2952
2953   return MULT_COST_LESS (&alg->cost, mult_cost);
2954 }
2955
2956 /* A subroutine of expand_mult, used for constant multiplications.
2957    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2958    convenient.  Use the shift/add sequence described by ALG and apply
2959    the final fixup specified by VARIANT.  */
2960
2961 static rtx
2962 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
2963                    rtx target, const struct algorithm *alg,
2964                    enum mult_variant variant)
2965 {
2966   HOST_WIDE_INT val_so_far;
2967   rtx_insn *insn;
2968   rtx accum, tem;
2969   int opno;
2970   machine_mode nmode;
2971
2972   /* Avoid referencing memory over and over and invalid sharing
2973      on SUBREGs.  */
2974   op0 = force_reg (mode, op0);
2975
2976   /* ACCUM starts out either as OP0 or as a zero, depending on
2977      the first operation.  */
2978
2979   if (alg->op[0] == alg_zero)
2980     {
2981       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2982       val_so_far = 0;
2983     }
2984   else if (alg->op[0] == alg_m)
2985     {
2986       accum = copy_to_mode_reg (mode, op0);
2987       val_so_far = 1;
2988     }
2989   else
2990     gcc_unreachable ();
2991
2992   for (opno = 1; opno < alg->ops; opno++)
2993     {
2994       int log = alg->log[opno];
2995       rtx shift_subtarget = optimize ? 0 : accum;
2996       rtx add_target
2997         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2998            && !optimize)
2999           ? target : 0;
3000       rtx accum_target = optimize ? 0 : accum;
3001       rtx accum_inner;
3002
3003       switch (alg->op[opno])
3004         {
3005         case alg_shift:
3006           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3007           /* REG_EQUAL note will be attached to the following insn.  */
3008           emit_move_insn (accum, tem);
3009           val_so_far <<= log;
3010           break;
3011
3012         case alg_add_t_m2:
3013           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3014           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3015                                  add_target ? add_target : accum_target);
3016           val_so_far += (HOST_WIDE_INT) 1 << log;
3017           break;
3018
3019         case alg_sub_t_m2:
3020           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3021           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3022                                  add_target ? add_target : accum_target);
3023           val_so_far -= (HOST_WIDE_INT) 1 << log;
3024           break;
3025
3026         case alg_add_t2_m:
3027           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3028                                 log, shift_subtarget, 0);
3029           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3030                                  add_target ? add_target : accum_target);
3031           val_so_far = (val_so_far << log) + 1;
3032           break;
3033
3034         case alg_sub_t2_m:
3035           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3036                                 log, shift_subtarget, 0);
3037           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3038                                  add_target ? add_target : accum_target);
3039           val_so_far = (val_so_far << log) - 1;
3040           break;
3041
3042         case alg_add_factor:
3043           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3044           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3045                                  add_target ? add_target : accum_target);
3046           val_so_far += val_so_far << log;
3047           break;
3048
3049         case alg_sub_factor:
3050           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3051           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3052                                  (add_target
3053                                   ? add_target : (optimize ? 0 : tem)));
3054           val_so_far = (val_so_far << log) - val_so_far;
3055           break;
3056
3057         default:
3058           gcc_unreachable ();
3059         }
3060
3061       if (SCALAR_INT_MODE_P (mode))
3062         {
3063           /* Write a REG_EQUAL note on the last insn so that we can cse
3064              multiplication sequences.  Note that if ACCUM is a SUBREG,
3065              we've set the inner register and must properly indicate that.  */
3066           tem = op0, nmode = mode;
3067           accum_inner = accum;
3068           if (GET_CODE (accum) == SUBREG)
3069             {
3070               accum_inner = SUBREG_REG (accum);
3071               nmode = GET_MODE (accum_inner);
3072               tem = gen_lowpart (nmode, op0);
3073             }
3074
3075           insn = get_last_insn ();
3076           set_dst_reg_note (insn, REG_EQUAL,
3077                             gen_rtx_MULT (nmode, tem,
3078                                           gen_int_mode (val_so_far, nmode)),
3079                             accum_inner);
3080         }
3081     }
3082
3083   if (variant == negate_variant)
3084     {
3085       val_so_far = -val_so_far;
3086       accum = expand_unop (mode, neg_optab, accum, target, 0);
3087     }
3088   else if (variant == add_variant)
3089     {
3090       val_so_far = val_so_far + 1;
3091       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3092     }
3093
3094   /* Compare only the bits of val and val_so_far that are significant
3095      in the result mode, to avoid sign-/zero-extension confusion.  */
3096   nmode = GET_MODE_INNER (mode);
3097   if (nmode == VOIDmode)
3098     nmode = mode;
3099   val &= GET_MODE_MASK (nmode);
3100   val_so_far &= GET_MODE_MASK (nmode);
3101   gcc_assert (val == val_so_far);
3102
3103   return accum;
3104 }
3105
3106 /* Perform a multiplication and return an rtx for the result.
3107    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3108    TARGET is a suggestion for where to store the result (an rtx).
3109
3110    We check specially for a constant integer as OP1.
3111    If you want this check for OP0 as well, then before calling
3112    you should swap the two operands if OP0 would be constant.  */
3113
3114 rtx
3115 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3116              int unsignedp)
3117 {
3118   enum mult_variant variant;
3119   struct algorithm algorithm;
3120   rtx scalar_op1;
3121   int max_cost;
3122   bool speed = optimize_insn_for_speed_p ();
3123   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3124
3125   if (CONSTANT_P (op0))
3126     std::swap (op0, op1);
3127
3128   /* For vectors, there are several simplifications that can be made if
3129      all elements of the vector constant are identical.  */
3130   scalar_op1 = op1;
3131   if (GET_CODE (op1) == CONST_VECTOR)
3132     {
3133       int i, n = CONST_VECTOR_NUNITS (op1);
3134       scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3135       for (i = 1; i < n; ++i)
3136         if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3137           goto skip_scalar;
3138     }
3139
3140   if (INTEGRAL_MODE_P (mode))
3141     {
3142       rtx fake_reg;
3143       HOST_WIDE_INT coeff;
3144       bool is_neg;
3145       int mode_bitsize;
3146
3147       if (op1 == CONST0_RTX (mode))
3148         return op1;
3149       if (op1 == CONST1_RTX (mode))
3150         return op0;
3151       if (op1 == CONSTM1_RTX (mode))
3152         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3153                             op0, target, 0);
3154
3155       if (do_trapv)
3156         goto skip_synth;
3157
3158       /* If mode is integer vector mode, check if the backend supports
3159          vector lshift (by scalar or vector) at all.  If not, we can't use
3160          synthetized multiply.  */
3161       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3162           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3163           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3164         goto skip_synth;
3165
3166       /* These are the operations that are potentially turned into
3167          a sequence of shifts and additions.  */
3168       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3169
3170       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3171          less than or equal in size to `unsigned int' this doesn't matter.
3172          If the mode is larger than `unsigned int', then synth_mult works
3173          only if the constant value exactly fits in an `unsigned int' without
3174          any truncation.  This means that multiplying by negative values does
3175          not work; results are off by 2^32 on a 32 bit machine.  */
3176       if (CONST_INT_P (scalar_op1))
3177         {
3178           coeff = INTVAL (scalar_op1);
3179           is_neg = coeff < 0;
3180         }
3181 #if TARGET_SUPPORTS_WIDE_INT
3182       else if (CONST_WIDE_INT_P (scalar_op1))
3183 #else
3184       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3185 #endif
3186         {
3187           int shift = wi::exact_log2 (std::make_pair (scalar_op1, mode));
3188           /* Perfect power of 2 (other than 1, which is handled above).  */
3189           if (shift > 0)
3190             return expand_shift (LSHIFT_EXPR, mode, op0,
3191                                  shift, target, unsignedp);
3192           else
3193             goto skip_synth;
3194         }
3195       else
3196         goto skip_synth;
3197
3198       /* We used to test optimize here, on the grounds that it's better to
3199          produce a smaller program when -O is not used.  But this causes
3200          such a terrible slowdown sometimes that it seems better to always
3201          use synth_mult.  */
3202
3203       /* Special case powers of two.  */
3204       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3205           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3206         return expand_shift (LSHIFT_EXPR, mode, op0,
3207                              floor_log2 (coeff), target, unsignedp);
3208
3209       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3210
3211       /* Attempt to handle multiplication of DImode values by negative
3212          coefficients, by performing the multiplication by a positive
3213          multiplier and then inverting the result.  */
3214       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3215         {
3216           /* Its safe to use -coeff even for INT_MIN, as the
3217              result is interpreted as an unsigned coefficient.
3218              Exclude cost of op0 from max_cost to match the cost
3219              calculation of the synth_mult.  */
3220           coeff = -(unsigned HOST_WIDE_INT) coeff;
3221           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3222                       - neg_cost (speed, mode));
3223           if (max_cost <= 0)
3224             goto skip_synth;
3225
3226           /* Special case powers of two.  */
3227           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3228             {
3229               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3230                                        floor_log2 (coeff), target, unsignedp);
3231               return expand_unop (mode, neg_optab, temp, target, 0);
3232             }
3233
3234           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3235                                    max_cost))
3236             {
3237               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3238                                             &algorithm, variant);
3239               return expand_unop (mode, neg_optab, temp, target, 0);
3240             }
3241           goto skip_synth;
3242         }
3243
3244       /* Exclude cost of op0 from max_cost to match the cost
3245          calculation of the synth_mult.  */
3246       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3247       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3248         return expand_mult_const (mode, op0, coeff, target,
3249                                   &algorithm, variant);
3250     }
3251  skip_synth:
3252
3253   /* Expand x*2.0 as x+x.  */
3254   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3255     {
3256       REAL_VALUE_TYPE d;
3257       REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3258
3259       if (REAL_VALUES_EQUAL (d, dconst2))
3260         {
3261           op0 = force_reg (GET_MODE (op0), op0);
3262           return expand_binop (mode, add_optab, op0, op0,
3263                                target, unsignedp, OPTAB_LIB_WIDEN);
3264         }
3265     }
3266  skip_scalar:
3267
3268   /* This used to use umul_optab if unsigned, but for non-widening multiply
3269      there is no difference between signed and unsigned.  */
3270   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3271                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3272   gcc_assert (op0);
3273   return op0;
3274 }
3275
3276 /* Return a cost estimate for multiplying a register by the given
3277    COEFFicient in the given MODE and SPEED.  */
3278
3279 int
3280 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3281 {
3282   int max_cost;
3283   struct algorithm algorithm;
3284   enum mult_variant variant;
3285
3286   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3287   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3288   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3289     return algorithm.cost.cost;
3290   else
3291     return max_cost;
3292 }
3293
3294 /* Perform a widening multiplication and return an rtx for the result.
3295    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3296    TARGET is a suggestion for where to store the result (an rtx).
3297    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3298    or smul_widen_optab.
3299
3300    We check specially for a constant integer as OP1, comparing the
3301    cost of a widening multiply against the cost of a sequence of shifts
3302    and adds.  */
3303
3304 rtx
3305 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3306                       int unsignedp, optab this_optab)
3307 {
3308   bool speed = optimize_insn_for_speed_p ();
3309   rtx cop1;
3310
3311   if (CONST_INT_P (op1)
3312       && GET_MODE (op0) != VOIDmode
3313       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3314                                 this_optab == umul_widen_optab))
3315       && CONST_INT_P (cop1)
3316       && (INTVAL (cop1) >= 0
3317           || HWI_COMPUTABLE_MODE_P (mode)))
3318     {
3319       HOST_WIDE_INT coeff = INTVAL (cop1);
3320       int max_cost;
3321       enum mult_variant variant;
3322       struct algorithm algorithm;
3323
3324       if (coeff == 0)
3325         return CONST0_RTX (mode);
3326
3327       /* Special case powers of two.  */
3328       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3329         {
3330           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3331           return expand_shift (LSHIFT_EXPR, mode, op0,
3332                                floor_log2 (coeff), target, unsignedp);
3333         }
3334
3335       /* Exclude cost of op0 from max_cost to match the cost
3336          calculation of the synth_mult.  */
3337       max_cost = mul_widen_cost (speed, mode);
3338       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3339                                max_cost))
3340         {
3341           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3342           return expand_mult_const (mode, op0, coeff, target,
3343                                     &algorithm, variant);
3344         }
3345     }
3346   return expand_binop (mode, this_optab, op0, op1, target,
3347                        unsignedp, OPTAB_LIB_WIDEN);
3348 }
3349 \f
3350 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3351    replace division by D, and put the least significant N bits of the result
3352    in *MULTIPLIER_PTR and return the most significant bit.
3353
3354    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3355    needed precision is in PRECISION (should be <= N).
3356
3357    PRECISION should be as small as possible so this function can choose
3358    multiplier more freely.
3359
3360    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3361    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3362
3363    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3364    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3365
3366 unsigned HOST_WIDE_INT
3367 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3368                    unsigned HOST_WIDE_INT *multiplier_ptr,
3369                    int *post_shift_ptr, int *lgup_ptr)
3370 {
3371   int lgup, post_shift;
3372   int pow, pow2;
3373
3374   /* lgup = ceil(log2(divisor)); */
3375   lgup = ceil_log2 (d);
3376
3377   gcc_assert (lgup <= n);
3378
3379   pow = n + lgup;
3380   pow2 = n + lgup - precision;
3381
3382   /* mlow = 2^(N + lgup)/d */
3383   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3384   wide_int mlow = wi::udiv_trunc (val, d);
3385
3386   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3387   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3388   wide_int mhigh = wi::udiv_trunc (val, d);
3389
3390   /* If precision == N, then mlow, mhigh exceed 2^N
3391      (but they do not exceed 2^(N+1)).  */
3392
3393   /* Reduce to lowest terms.  */
3394   for (post_shift = lgup; post_shift > 0; post_shift--)
3395     {
3396       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3397                                                        HOST_BITS_PER_WIDE_INT);
3398       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3399                                                        HOST_BITS_PER_WIDE_INT);
3400       if (ml_lo >= mh_lo)
3401         break;
3402
3403       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3404       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3405     }
3406
3407   *post_shift_ptr = post_shift;
3408   *lgup_ptr = lgup;
3409   if (n < HOST_BITS_PER_WIDE_INT)
3410     {
3411       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3412       *multiplier_ptr = mhigh.to_uhwi () & mask;
3413       return mhigh.to_uhwi () >= mask;
3414     }
3415   else
3416     {
3417       *multiplier_ptr = mhigh.to_uhwi ();
3418       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3419     }
3420 }
3421
3422 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3423    congruent to 1 (mod 2**N).  */
3424
3425 static unsigned HOST_WIDE_INT
3426 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3427 {
3428   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3429
3430   /* The algorithm notes that the choice y = x satisfies
3431      x*y == 1 mod 2^3, since x is assumed odd.
3432      Each iteration doubles the number of bits of significance in y.  */
3433
3434   unsigned HOST_WIDE_INT mask;
3435   unsigned HOST_WIDE_INT y = x;
3436   int nbit = 3;
3437
3438   mask = (n == HOST_BITS_PER_WIDE_INT
3439           ? ~(unsigned HOST_WIDE_INT) 0
3440           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3441
3442   while (nbit < n)
3443     {
3444       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3445       nbit *= 2;
3446     }
3447   return y;
3448 }
3449
3450 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3451    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3452    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3453    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3454    become signed.
3455
3456    The result is put in TARGET if that is convenient.
3457
3458    MODE is the mode of operation.  */
3459
3460 rtx
3461 expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
3462                              rtx op1, rtx target, int unsignedp)
3463 {
3464   rtx tem;
3465   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3466
3467   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3468                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3469   tem = expand_and (mode, tem, op1, NULL_RTX);
3470   adj_operand
3471     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3472                      adj_operand);
3473
3474   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3475                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3476   tem = expand_and (mode, tem, op0, NULL_RTX);
3477   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3478                           target);
3479
3480   return target;
3481 }
3482
3483 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3484
3485 static rtx
3486 extract_high_half (machine_mode mode, rtx op)
3487 {
3488   machine_mode wider_mode;
3489
3490   if (mode == word_mode)
3491     return gen_highpart (mode, op);
3492
3493   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3494
3495   wider_mode = GET_MODE_WIDER_MODE (mode);
3496   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3497                      GET_MODE_BITSIZE (mode), 0, 1);
3498   return convert_modes (mode, wider_mode, op, 0);
3499 }
3500
3501 /* Like expmed_mult_highpart, but only consider using a multiplication
3502    optab.  OP1 is an rtx for the constant operand.  */
3503
3504 static rtx
3505 expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
3506                             rtx target, int unsignedp, int max_cost)
3507 {
3508   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3509   machine_mode wider_mode;
3510   optab moptab;
3511   rtx tem;
3512   int size;
3513   bool speed = optimize_insn_for_speed_p ();
3514
3515   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3516
3517   wider_mode = GET_MODE_WIDER_MODE (mode);
3518   size = GET_MODE_BITSIZE (mode);
3519
3520   /* Firstly, try using a multiplication insn that only generates the needed
3521      high part of the product, and in the sign flavor of unsignedp.  */
3522   if (mul_highpart_cost (speed, mode) < max_cost)
3523     {
3524       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3525       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3526                           unsignedp, OPTAB_DIRECT);
3527       if (tem)
3528         return tem;
3529     }
3530
3531   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3532      Need to adjust the result after the multiplication.  */
3533   if (size - 1 < BITS_PER_WORD
3534       && (mul_highpart_cost (speed, mode)
3535           + 2 * shift_cost (speed, mode, size-1)
3536           + 4 * add_cost (speed, mode) < max_cost))
3537     {
3538       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3539       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3540                           unsignedp, OPTAB_DIRECT);
3541       if (tem)
3542         /* We used the wrong signedness.  Adjust the result.  */
3543         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3544                                             tem, unsignedp);
3545     }
3546
3547   /* Try widening multiplication.  */
3548   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3549   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3550       && mul_widen_cost (speed, wider_mode) < max_cost)
3551     {
3552       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3553                           unsignedp, OPTAB_WIDEN);
3554       if (tem)
3555         return extract_high_half (mode, tem);
3556     }
3557
3558   /* Try widening the mode and perform a non-widening multiplication.  */
3559   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3560       && size - 1 < BITS_PER_WORD
3561       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3562           < max_cost))
3563     {
3564       rtx_insn *insns;
3565       rtx wop0, wop1;
3566
3567       /* We need to widen the operands, for example to ensure the
3568          constant multiplier is correctly sign or zero extended.
3569          Use a sequence to clean-up any instructions emitted by
3570          the conversions if things don't work out.  */
3571       start_sequence ();
3572       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3573       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3574       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3575                           unsignedp, OPTAB_WIDEN);
3576       insns = get_insns ();
3577       end_sequence ();
3578
3579       if (tem)
3580         {
3581           emit_insn (insns);
3582           return extract_high_half (mode, tem);
3583         }
3584     }
3585
3586   /* Try widening multiplication of opposite signedness, and adjust.  */
3587   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3588   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3589       && size - 1 < BITS_PER_WORD
3590       && (mul_widen_cost (speed, wider_mode)
3591           + 2 * shift_cost (speed, mode, size-1)
3592           + 4 * add_cost (speed, mode) < max_cost))
3593     {
3594       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3595                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3596       if (tem != 0)
3597         {
3598           tem = extract_high_half (mode, tem);
3599           /* We used the wrong signedness.  Adjust the result.  */
3600           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3601                                               target, unsignedp);
3602         }
3603     }
3604
3605   return 0;
3606 }
3607
3608 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3609    putting the high half of the result in TARGET if that is convenient,
3610    and return where the result is.  If the operation can not be performed,
3611    0 is returned.
3612
3613    MODE is the mode of operation and result.
3614
3615    UNSIGNEDP nonzero means unsigned multiply.
3616
3617    MAX_COST is the total allowed cost for the expanded RTL.  */
3618
3619 static rtx
3620 expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
3621                       rtx target, int unsignedp, int max_cost)
3622 {
3623   machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3624   unsigned HOST_WIDE_INT cnst1;
3625   int extra_cost;
3626   bool sign_adjust = false;
3627   enum mult_variant variant;
3628   struct algorithm alg;
3629   rtx tem;
3630   bool speed = optimize_insn_for_speed_p ();
3631
3632   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3633   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3634   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3635
3636   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3637
3638   /* We can't optimize modes wider than BITS_PER_WORD.
3639      ??? We might be able to perform double-word arithmetic if
3640      mode == word_mode, however all the cost calculations in
3641      synth_mult etc. assume single-word operations.  */
3642   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3643     return expmed_mult_highpart_optab (mode, op0, op1, target,
3644                                        unsignedp, max_cost);
3645
3646   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3647
3648   /* Check whether we try to multiply by a negative constant.  */
3649   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3650     {
3651       sign_adjust = true;
3652       extra_cost += add_cost (speed, mode);
3653     }
3654
3655   /* See whether shift/add multiplication is cheap enough.  */
3656   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3657                            max_cost - extra_cost))
3658     {
3659       /* See whether the specialized multiplication optabs are
3660          cheaper than the shift/add version.  */
3661       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3662                                         alg.cost.cost + extra_cost);
3663       if (tem)
3664         return tem;
3665
3666       tem = convert_to_mode (wider_mode, op0, unsignedp);
3667       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3668       tem = extract_high_half (mode, tem);
3669
3670       /* Adjust result for signedness.  */
3671       if (sign_adjust)
3672         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3673
3674       return tem;
3675     }
3676   return expmed_mult_highpart_optab (mode, op0, op1, target,
3677                                      unsignedp, max_cost);
3678 }
3679
3680
3681 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3682
3683 static rtx
3684 expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3685 {
3686   rtx result, temp, shift;
3687   rtx_code_label *label;
3688   int logd;
3689   int prec = GET_MODE_PRECISION (mode);
3690
3691   logd = floor_log2 (d);
3692   result = gen_reg_rtx (mode);
3693
3694   /* Avoid conditional branches when they're expensive.  */
3695   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3696       && optimize_insn_for_speed_p ())
3697     {
3698       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3699                                       mode, 0, -1);
3700       if (signmask)
3701         {
3702           HOST_WIDE_INT masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3703           signmask = force_reg (mode, signmask);
3704           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3705
3706           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3707              which instruction sequence to use.  If logical right shifts
3708              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3709              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3710
3711           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3712           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3713               || (set_src_cost (temp, optimize_insn_for_speed_p ())
3714                   > COSTS_N_INSNS (2)))
3715             {
3716               temp = expand_binop (mode, xor_optab, op0, signmask,
3717                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3718               temp = expand_binop (mode, sub_optab, temp, signmask,
3719                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3720               temp = expand_binop (mode, and_optab, temp,
3721                                    gen_int_mode (masklow, mode),
3722                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3723               temp = expand_binop (mode, xor_optab, temp, signmask,
3724                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3725               temp = expand_binop (mode, sub_optab, temp, signmask,
3726                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3727             }
3728           else
3729             {
3730               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3731                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3732               signmask = force_reg (mode, signmask);
3733
3734               temp = expand_binop (mode, add_optab, op0, signmask,
3735                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3736               temp = expand_binop (mode, and_optab, temp,
3737                                    gen_int_mode (masklow, mode),
3738                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3739               temp = expand_binop (mode, sub_optab, temp, signmask,
3740                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3741             }
3742           return temp;
3743         }
3744     }
3745
3746   /* Mask contains the mode's signbit and the significant bits of the
3747      modulus.  By including the signbit in the operation, many targets
3748      can avoid an explicit compare operation in the following comparison
3749      against zero.  */
3750   wide_int mask = wi::mask (logd, false, prec);
3751   mask = wi::set_bit (mask, prec - 1);
3752
3753   temp = expand_binop (mode, and_optab, op0,
3754                        immed_wide_int_const (mask, mode),
3755                        result, 1, OPTAB_LIB_WIDEN);
3756   if (temp != result)
3757     emit_move_insn (result, temp);
3758
3759   label = gen_label_rtx ();
3760   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3761
3762   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3763                        0, OPTAB_LIB_WIDEN);
3764
3765   mask = wi::mask (logd, true, prec);
3766   temp = expand_binop (mode, ior_optab, temp,
3767                        immed_wide_int_const (mask, mode),
3768                        result, 1, OPTAB_LIB_WIDEN);
3769   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3770                        0, OPTAB_LIB_WIDEN);
3771   if (temp != result)
3772     emit_move_insn (result, temp);
3773   emit_label (label);
3774   return result;
3775 }
3776
3777 /* Expand signed division of OP0 by a power of two D in mode MODE.
3778    This routine is only called for positive values of D.  */
3779
3780 static rtx
3781 expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3782 {
3783   rtx temp;
3784   rtx_code_label *label;
3785   int logd;
3786
3787   logd = floor_log2 (d);
3788
3789   if (d == 2
3790       && BRANCH_COST (optimize_insn_for_speed_p (),
3791                       false) >= 1)
3792     {
3793       temp = gen_reg_rtx (mode);
3794       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3795       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3796                            0, OPTAB_LIB_WIDEN);
3797       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3798     }
3799
3800 #ifdef HAVE_conditional_move
3801   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3802       >= 2)
3803     {
3804       rtx temp2;
3805
3806       start_sequence ();
3807       temp2 = copy_to_mode_reg (mode, op0);
3808       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3809                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3810       temp = force_reg (mode, temp);
3811
3812       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3813       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3814                                      mode, temp, temp2, mode, 0);
3815       if (temp2)
3816         {
3817           rtx_insn *seq = get_insns ();
3818           end_sequence ();
3819           emit_insn (seq);
3820           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3821         }
3822       end_sequence ();
3823     }
3824 #endif
3825
3826   if (BRANCH_COST (optimize_insn_for_speed_p (),
3827                    false) >= 2)
3828     {
3829       int ushift = GET_MODE_BITSIZE (mode) - logd;
3830
3831       temp = gen_reg_rtx (mode);
3832       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3833       if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3834           || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3835              > COSTS_N_INSNS (1))
3836         temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3837                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3838       else
3839         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3840                              ushift, NULL_RTX, 1);
3841       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3842                            0, OPTAB_LIB_WIDEN);
3843       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3844     }
3845
3846   label = gen_label_rtx ();
3847   temp = copy_to_mode_reg (mode, op0);
3848   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3849   expand_inc (temp, gen_int_mode (d - 1, mode));
3850   emit_label (label);
3851   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3852 }
3853 \f
3854 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3855    if that is convenient, and returning where the result is.
3856    You may request either the quotient or the remainder as the result;
3857    specify REM_FLAG nonzero to get the remainder.
3858
3859    CODE is the expression code for which kind of division this is;
3860    it controls how rounding is done.  MODE is the machine mode to use.
3861    UNSIGNEDP nonzero means do unsigned division.  */
3862
3863 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3864    and then correct it by or'ing in missing high bits
3865    if result of ANDI is nonzero.
3866    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3867    This could optimize to a bfexts instruction.
3868    But C doesn't use these operations, so their optimizations are
3869    left for later.  */
3870 /* ??? For modulo, we don't actually need the highpart of the first product,
3871    the low part will do nicely.  And for small divisors, the second multiply
3872    can also be a low-part only multiply or even be completely left out.
3873    E.g. to calculate the remainder of a division by 3 with a 32 bit
3874    multiply, multiply with 0x55555556 and extract the upper two bits;
3875    the result is exact for inputs up to 0x1fffffff.
3876    The input range can be reduced by using cross-sum rules.
3877    For odd divisors >= 3, the following table gives right shift counts
3878    so that if a number is shifted by an integer multiple of the given
3879    amount, the remainder stays the same:
3880    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3881    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3882    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3883    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3884    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3885
3886    Cross-sum rules for even numbers can be derived by leaving as many bits
3887    to the right alone as the divisor has zeros to the right.
3888    E.g. if x is an unsigned 32 bit number:
3889    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3890    */
3891
3892 rtx
3893 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
3894                rtx op0, rtx op1, rtx target, int unsignedp)
3895 {
3896   machine_mode compute_mode;
3897   rtx tquotient;
3898   rtx quotient = 0, remainder = 0;
3899   rtx_insn *last;
3900   int size;
3901   rtx_insn *insn;
3902   optab optab1, optab2;
3903   int op1_is_constant, op1_is_pow2 = 0;
3904   int max_cost, extra_cost;
3905   static HOST_WIDE_INT last_div_const = 0;
3906   bool speed = optimize_insn_for_speed_p ();
3907
3908   op1_is_constant = CONST_INT_P (op1);
3909   if (op1_is_constant)
3910     {
3911       unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1);
3912       if (unsignedp)
3913         ext_op1 &= GET_MODE_MASK (mode);
3914       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3915                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3916     }
3917
3918   /*
3919      This is the structure of expand_divmod:
3920
3921      First comes code to fix up the operands so we can perform the operations
3922      correctly and efficiently.
3923
3924      Second comes a switch statement with code specific for each rounding mode.
3925      For some special operands this code emits all RTL for the desired
3926      operation, for other cases, it generates only a quotient and stores it in
3927      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3928      to indicate that it has not done anything.
3929
3930      Last comes code that finishes the operation.  If QUOTIENT is set and
3931      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3932      QUOTIENT is not set, it is computed using trunc rounding.
3933
3934      We try to generate special code for division and remainder when OP1 is a
3935      constant.  If |OP1| = 2**n we can use shifts and some other fast
3936      operations.  For other values of OP1, we compute a carefully selected
3937      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3938      by m.
3939
3940      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3941      half of the product.  Different strategies for generating the product are
3942      implemented in expmed_mult_highpart.
3943
3944      If what we actually want is the remainder, we generate that by another
3945      by-constant multiplication and a subtraction.  */
3946
3947   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3948      code below will malfunction if we are, so check here and handle
3949      the special case if so.  */
3950   if (op1 == const1_rtx)
3951     return rem_flag ? const0_rtx : op0;
3952
3953     /* When dividing by -1, we could get an overflow.
3954      negv_optab can handle overflows.  */
3955   if (! unsignedp && op1 == constm1_rtx)
3956     {
3957       if (rem_flag)
3958         return const0_rtx;
3959       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
3960                           ? negv_optab : neg_optab, op0, target, 0);
3961     }
3962
3963   if (target
3964       /* Don't use the function value register as a target
3965          since we have to read it as well as write it,
3966          and function-inlining gets confused by this.  */
3967       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3968           /* Don't clobber an operand while doing a multi-step calculation.  */
3969           || ((rem_flag || op1_is_constant)
3970               && (reg_mentioned_p (target, op0)
3971                   || (MEM_P (op0) && MEM_P (target))))
3972           || reg_mentioned_p (target, op1)
3973           || (MEM_P (op1) && MEM_P (target))))
3974     target = 0;
3975
3976   /* Get the mode in which to perform this computation.  Normally it will
3977      be MODE, but sometimes we can't do the desired operation in MODE.
3978      If so, pick a wider mode in which we can do the operation.  Convert
3979      to that mode at the start to avoid repeated conversions.
3980
3981      First see what operations we need.  These depend on the expression
3982      we are evaluating.  (We assume that divxx3 insns exist under the
3983      same conditions that modxx3 insns and that these insns don't normally
3984      fail.  If these assumptions are not correct, we may generate less
3985      efficient code in some cases.)
3986
3987      Then see if we find a mode in which we can open-code that operation
3988      (either a division, modulus, or shift).  Finally, check for the smallest
3989      mode for which we can do the operation with a library call.  */
3990
3991   /* We might want to refine this now that we have division-by-constant
3992      optimization.  Since expmed_mult_highpart tries so many variants, it is
3993      not straightforward to generalize this.  Maybe we should make an array
3994      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3995
3996   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3997             ? (unsignedp ? lshr_optab : ashr_optab)
3998             : (unsignedp ? udiv_optab : sdiv_optab));
3999   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
4000             ? optab1
4001             : (unsignedp ? udivmod_optab : sdivmod_optab));
4002
4003   for (compute_mode = mode; compute_mode != VOIDmode;
4004        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4005     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4006         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4007       break;
4008
4009   if (compute_mode == VOIDmode)
4010     for (compute_mode = mode; compute_mode != VOIDmode;
4011          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4012       if (optab_libfunc (optab1, compute_mode)
4013           || optab_libfunc (optab2, compute_mode))
4014         break;
4015
4016   /* If we still couldn't find a mode, use MODE, but expand_binop will
4017      probably die.  */
4018   if (compute_mode == VOIDmode)
4019     compute_mode = mode;
4020
4021   if (target && GET_MODE (target) == compute_mode)
4022     tquotient = target;
4023   else
4024     tquotient = gen_reg_rtx (compute_mode);
4025
4026   size = GET_MODE_BITSIZE (compute_mode);
4027 #if 0
4028   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4029      (mode), and thereby get better code when OP1 is a constant.  Do that
4030      later.  It will require going over all usages of SIZE below.  */
4031   size = GET_MODE_BITSIZE (mode);
4032 #endif
4033
4034   /* Only deduct something for a REM if the last divide done was
4035      for a different constant.   Then set the constant of the last
4036      divide.  */
4037   max_cost = (unsignedp
4038               ? udiv_cost (speed, compute_mode)
4039               : sdiv_cost (speed, compute_mode));
4040   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4041                      && INTVAL (op1) == last_div_const))
4042     max_cost -= (mul_cost (speed, compute_mode)
4043                  + add_cost (speed, compute_mode));
4044
4045   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4046
4047   /* Now convert to the best mode to use.  */
4048   if (compute_mode != mode)
4049     {
4050       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4051       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4052
4053       /* convert_modes may have placed op1 into a register, so we
4054          must recompute the following.  */
4055       op1_is_constant = CONST_INT_P (op1);
4056       op1_is_pow2 = (op1_is_constant
4057                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4058                           || (! unsignedp
4059                               && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1))))));
4060     }
4061
4062   /* If one of the operands is a volatile MEM, copy it into a register.  */
4063
4064   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4065     op0 = force_reg (compute_mode, op0);
4066   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4067     op1 = force_reg (compute_mode, op1);
4068
4069   /* If we need the remainder or if OP1 is constant, we need to
4070      put OP0 in a register in case it has any queued subexpressions.  */
4071   if (rem_flag || op1_is_constant)
4072     op0 = force_reg (compute_mode, op0);
4073
4074   last = get_last_insn ();
4075
4076   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4077   if (unsignedp)
4078     {
4079       if (code == FLOOR_DIV_EXPR)
4080         code = TRUNC_DIV_EXPR;
4081       if (code == FLOOR_MOD_EXPR)
4082         code = TRUNC_MOD_EXPR;
4083       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4084         code = TRUNC_DIV_EXPR;
4085     }
4086
4087   if (op1 != const0_rtx)
4088     switch (code)
4089       {
4090       case TRUNC_MOD_EXPR:
4091       case TRUNC_DIV_EXPR:
4092         if (op1_is_constant)
4093           {
4094             if (unsignedp)
4095               {
4096                 unsigned HOST_WIDE_INT mh, ml;
4097                 int pre_shift, post_shift;
4098                 int dummy;
4099                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4100                                             & GET_MODE_MASK (compute_mode));
4101
4102                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4103                   {
4104                     pre_shift = floor_log2 (d);
4105                     if (rem_flag)
4106                       {
4107                         unsigned HOST_WIDE_INT mask
4108                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4109                         remainder
4110                           = expand_binop (compute_mode, and_optab, op0,
4111                                           gen_int_mode (mask, compute_mode),
4112                                           remainder, 1,
4113                                           OPTAB_LIB_WIDEN);
4114                         if (remainder)
4115                           return gen_lowpart (mode, remainder);
4116                       }
4117                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4118                                              pre_shift, tquotient, 1);
4119                   }
4120                 else if (size <= HOST_BITS_PER_WIDE_INT)
4121                   {
4122                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4123                       {
4124                         /* Most significant bit of divisor is set; emit an scc
4125                            insn.  */
4126                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4127                                                           compute_mode, 1, 1);
4128                       }
4129                     else
4130                       {
4131                         /* Find a suitable multiplier and right shift count
4132                            instead of multiplying with D.  */
4133
4134                         mh = choose_multiplier (d, size, size,
4135                                                 &ml, &post_shift, &dummy);
4136
4137                         /* If the suggested multiplier is more than SIZE bits,
4138                            we can do better for even divisors, using an
4139                            initial right shift.  */
4140                         if (mh != 0 && (d & 1) == 0)
4141                           {
4142                             pre_shift = floor_log2 (d & -d);
4143                             mh = choose_multiplier (d >> pre_shift, size,
4144                                                     size - pre_shift,
4145                                                     &ml, &post_shift, &dummy);
4146                             gcc_assert (!mh);
4147                           }
4148                         else
4149                           pre_shift = 0;
4150
4151                         if (mh != 0)
4152                           {
4153                             rtx t1, t2, t3, t4;
4154
4155                             if (post_shift - 1 >= BITS_PER_WORD)
4156                               goto fail1;
4157
4158                             extra_cost
4159                               = (shift_cost (speed, compute_mode, post_shift - 1)
4160                                  + shift_cost (speed, compute_mode, 1)
4161                                  + 2 * add_cost (speed, compute_mode));
4162                             t1 = expmed_mult_highpart
4163                               (compute_mode, op0,
4164                                gen_int_mode (ml, compute_mode),
4165                                NULL_RTX, 1, max_cost - extra_cost);
4166                             if (t1 == 0)
4167                               goto fail1;
4168                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4169                                                                op0, t1),
4170                                                 NULL_RTX);
4171                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4172                                                t2, 1, NULL_RTX, 1);
4173                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4174                                                               t1, t3),
4175                                                 NULL_RTX);
4176                             quotient = expand_shift
4177                               (RSHIFT_EXPR, compute_mode, t4,
4178                                post_shift - 1, tquotient, 1);
4179                           }
4180                         else
4181                           {
4182                             rtx t1, t2;
4183
4184                             if (pre_shift >= BITS_PER_WORD
4185                                 || post_shift >= BITS_PER_WORD)
4186                               goto fail1;
4187
4188                             t1 = expand_shift
4189                               (RSHIFT_EXPR, compute_mode, op0,
4190                                pre_shift, NULL_RTX, 1);
4191                             extra_cost
4192                               = (shift_cost (speed, compute_mode, pre_shift)
4193                                  + shift_cost (speed, compute_mode, post_shift));
4194                             t2 = expmed_mult_highpart
4195                               (compute_mode, t1,
4196                                gen_int_mode (ml, compute_mode),
4197                                NULL_RTX, 1, max_cost - extra_cost);
4198                             if (t2 == 0)
4199                               goto fail1;
4200                             quotient = expand_shift
4201                               (RSHIFT_EXPR, compute_mode, t2,
4202                                post_shift, tquotient, 1);
4203                           }
4204                       }
4205                   }
4206                 else            /* Too wide mode to use tricky code */
4207                   break;
4208
4209                 insn = get_last_insn ();
4210                 if (insn != last)
4211                   set_dst_reg_note (insn, REG_EQUAL,
4212                                     gen_rtx_UDIV (compute_mode, op0, op1),
4213                                     quotient);
4214               }
4215             else                /* TRUNC_DIV, signed */
4216               {
4217                 unsigned HOST_WIDE_INT ml;
4218                 int lgup, post_shift;
4219                 rtx mlr;
4220                 HOST_WIDE_INT d = INTVAL (op1);
4221                 unsigned HOST_WIDE_INT abs_d;
4222
4223                 /* Since d might be INT_MIN, we have to cast to
4224                    unsigned HOST_WIDE_INT before negating to avoid
4225                    undefined signed overflow.  */
4226                 abs_d = (d >= 0
4227                          ? (unsigned HOST_WIDE_INT) d
4228                          : - (unsigned HOST_WIDE_INT) d);
4229
4230                 /* n rem d = n rem -d */
4231                 if (rem_flag && d < 0)
4232                   {
4233                     d = abs_d;
4234                     op1 = gen_int_mode (abs_d, compute_mode);
4235                   }
4236
4237                 if (d == 1)
4238                   quotient = op0;
4239                 else if (d == -1)
4240                   quotient = expand_unop (compute_mode, neg_optab, op0,
4241                                           tquotient, 0);
4242                 else if (HOST_BITS_PER_WIDE_INT >= size
4243                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4244                   {
4245                     /* This case is not handled correctly below.  */
4246                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4247                                                 compute_mode, 1, 1);
4248                     if (quotient == 0)
4249                       goto fail1;
4250                   }
4251                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4252                          && (rem_flag
4253                              ? smod_pow2_cheap (speed, compute_mode)
4254                              : sdiv_pow2_cheap (speed, compute_mode))
4255                          /* We assume that cheap metric is true if the
4256                             optab has an expander for this mode.  */
4257                          && ((optab_handler ((rem_flag ? smod_optab
4258                                               : sdiv_optab),
4259                                              compute_mode)
4260                               != CODE_FOR_nothing)
4261                              || (optab_handler (sdivmod_optab,
4262                                                 compute_mode)
4263                                  != CODE_FOR_nothing)))
4264                   ;
4265                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4266                   {
4267                     if (rem_flag)
4268                       {
4269                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4270                         if (remainder)
4271                           return gen_lowpart (mode, remainder);
4272                       }
4273
4274                     if (sdiv_pow2_cheap (speed, compute_mode)
4275                         && ((optab_handler (sdiv_optab, compute_mode)
4276                              != CODE_FOR_nothing)
4277                             || (optab_handler (sdivmod_optab, compute_mode)
4278                                 != CODE_FOR_nothing)))
4279                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4280                                                 compute_mode, op0,
4281                                                 gen_int_mode (abs_d,
4282                                                               compute_mode),
4283                                                 NULL_RTX, 0);
4284                     else
4285                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4286
4287                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4288                        negate the quotient.  */
4289                     if (d < 0)
4290                       {
4291                         insn = get_last_insn ();
4292                         if (insn != last
4293                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4294                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4295                           set_dst_reg_note (insn, REG_EQUAL,
4296                                             gen_rtx_DIV (compute_mode, op0,
4297                                                          gen_int_mode
4298                                                            (abs_d,
4299                                                             compute_mode)),
4300                                             quotient);
4301
4302                         quotient = expand_unop (compute_mode, neg_optab,
4303                                                 quotient, quotient, 0);
4304                       }
4305                   }
4306                 else if (size <= HOST_BITS_PER_WIDE_INT)
4307                   {
4308                     choose_multiplier (abs_d, size, size - 1,
4309                                        &ml, &post_shift, &lgup);
4310                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4311                       {
4312                         rtx t1, t2, t3;
4313
4314                         if (post_shift >= BITS_PER_WORD
4315                             || size - 1 >= BITS_PER_WORD)
4316                           goto fail1;
4317
4318                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4319                                       + shift_cost (speed, compute_mode, size - 1)
4320                                       + add_cost (speed, compute_mode));
4321                         t1 = expmed_mult_highpart
4322                           (compute_mode, op0, gen_int_mode (ml, compute_mode),
4323                            NULL_RTX, 0, max_cost - extra_cost);
4324                         if (t1 == 0)
4325                           goto fail1;
4326                         t2 = expand_shift
4327                           (RSHIFT_EXPR, compute_mode, t1,
4328                            post_shift, NULL_RTX, 0);
4329                         t3 = expand_shift
4330                           (RSHIFT_EXPR, compute_mode, op0,
4331                            size - 1, NULL_RTX, 0);
4332                         if (d < 0)
4333                           quotient
4334                             = force_operand (gen_rtx_MINUS (compute_mode,
4335                                                             t3, t2),
4336                                              tquotient);
4337                         else
4338                           quotient
4339                             = force_operand (gen_rtx_MINUS (compute_mode,
4340                                                             t2, t3),
4341                                              tquotient);
4342                       }
4343                     else
4344                       {
4345                         rtx t1, t2, t3, t4;
4346
4347                         if (post_shift >= BITS_PER_WORD
4348                             || size - 1 >= BITS_PER_WORD)
4349                           goto fail1;
4350
4351                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4352                         mlr = gen_int_mode (ml, compute_mode);
4353                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4354                                       + shift_cost (speed, compute_mode, size - 1)
4355                                       + 2 * add_cost (speed, compute_mode));
4356                         t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4357                                                    NULL_RTX, 0,
4358                                                    max_cost - extra_cost);
4359                         if (t1 == 0)
4360                           goto fail1;
4361                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4362                                                           t1, op0),
4363                                             NULL_RTX);
4364                         t3 = expand_shift
4365                           (RSHIFT_EXPR, compute_mode, t2,
4366                            post_shift, NULL_RTX, 0);
4367                         t4 = expand_shift
4368                           (RSHIFT_EXPR, compute_mode, op0,
4369                            size - 1, NULL_RTX, 0);
4370                         if (d < 0)
4371                           quotient
4372                             = force_operand (gen_rtx_MINUS (compute_mode,
4373                                                             t4, t3),
4374                                              tquotient);
4375                         else
4376                           quotient
4377                             = force_operand (gen_rtx_MINUS (compute_mode,
4378                                                             t3, t4),
4379                                              tquotient);
4380                       }
4381                   }
4382                 else            /* Too wide mode to use tricky code */
4383                   break;
4384
4385                 insn = get_last_insn ();
4386                 if (insn != last)
4387                   set_dst_reg_note (insn, REG_EQUAL,
4388                                     gen_rtx_DIV (compute_mode, op0, op1),
4389                                     quotient);
4390               }
4391             break;
4392           }
4393       fail1:
4394         delete_insns_since (last);
4395         break;
4396
4397       case FLOOR_DIV_EXPR:
4398       case FLOOR_MOD_EXPR:
4399       /* We will come here only for signed operations.  */
4400         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4401           {
4402             unsigned HOST_WIDE_INT mh, ml;
4403             int pre_shift, lgup, post_shift;
4404             HOST_WIDE_INT d = INTVAL (op1);
4405
4406             if (d > 0)
4407               {
4408                 /* We could just as easily deal with negative constants here,
4409                    but it does not seem worth the trouble for GCC 2.6.  */
4410                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4411                   {
4412                     pre_shift = floor_log2 (d);
4413                     if (rem_flag)
4414                       {
4415                         unsigned HOST_WIDE_INT mask
4416                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4417                         remainder = expand_binop
4418                           (compute_mode, and_optab, op0,
4419                            gen_int_mode (mask, compute_mode),
4420                            remainder, 0, OPTAB_LIB_WIDEN);
4421                         if (remainder)
4422                           return gen_lowpart (mode, remainder);
4423                       }
4424                     quotient = expand_shift
4425                       (RSHIFT_EXPR, compute_mode, op0,
4426                        pre_shift, tquotient, 0);
4427                   }
4428                 else
4429                   {
4430                     rtx t1, t2, t3, t4;
4431
4432                     mh = choose_multiplier (d, size, size - 1,
4433                                             &ml, &post_shift, &lgup);
4434                     gcc_assert (!mh);
4435
4436                     if (post_shift < BITS_PER_WORD
4437                         && size - 1 < BITS_PER_WORD)
4438                       {
4439                         t1 = expand_shift
4440                           (RSHIFT_EXPR, compute_mode, op0,
4441                            size - 1, NULL_RTX, 0);
4442                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4443                                            NULL_RTX, 0, OPTAB_WIDEN);
4444                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4445                                       + shift_cost (speed, compute_mode, size - 1)
4446                                       + 2 * add_cost (speed, compute_mode));
4447                         t3 = expmed_mult_highpart
4448                           (compute_mode, t2, gen_int_mode (ml, compute_mode),
4449                            NULL_RTX, 1, max_cost - extra_cost);
4450                         if (t3 != 0)
4451                           {
4452                             t4 = expand_shift
4453                               (RSHIFT_EXPR, compute_mode, t3,
4454                                post_shift, NULL_RTX, 1);
4455                             quotient = expand_binop (compute_mode, xor_optab,
4456                                                      t4, t1, tquotient, 0,
4457                                                      OPTAB_WIDEN);
4458                           }
4459                       }
4460                   }
4461               }
4462             else
4463               {
4464                 rtx nsign, t1, t2, t3, t4;
4465                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4466                                                   op0, constm1_rtx), NULL_RTX);
4467                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4468                                    0, OPTAB_WIDEN);
4469                 nsign = expand_shift
4470                   (RSHIFT_EXPR, compute_mode, t2,
4471                    size - 1, NULL_RTX, 0);
4472                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4473                                     NULL_RTX);
4474                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4475                                     NULL_RTX, 0);
4476                 if (t4)
4477                   {
4478                     rtx t5;
4479                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4480                                       NULL_RTX, 0);
4481                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4482                                                             t4, t5),
4483                                               tquotient);
4484                   }
4485               }
4486           }
4487
4488         if (quotient != 0)
4489           break;
4490         delete_insns_since (last);
4491
4492         /* Try using an instruction that produces both the quotient and
4493            remainder, using truncation.  We can easily compensate the quotient
4494            or remainder to get floor rounding, once we have the remainder.
4495            Notice that we compute also the final remainder value here,
4496            and return the result right away.  */
4497         if (target == 0 || GET_MODE (target) != compute_mode)
4498           target = gen_reg_rtx (compute_mode);
4499
4500         if (rem_flag)
4501           {
4502             remainder
4503               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4504             quotient = gen_reg_rtx (compute_mode);
4505           }
4506         else
4507           {
4508             quotient
4509               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4510             remainder = gen_reg_rtx (compute_mode);
4511           }
4512
4513         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4514                                  quotient, remainder, 0))
4515           {
4516             /* This could be computed with a branch-less sequence.
4517                Save that for later.  */
4518             rtx tem;
4519             rtx_code_label *label = gen_label_rtx ();
4520             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4521             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4522                                 NULL_RTX, 0, OPTAB_WIDEN);
4523             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4524             expand_dec (quotient, const1_rtx);
4525             expand_inc (remainder, op1);
4526             emit_label (label);
4527             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4528           }
4529
4530         /* No luck with division elimination or divmod.  Have to do it
4531            by conditionally adjusting op0 *and* the result.  */
4532         {
4533           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4534           rtx adjusted_op0;
4535           rtx tem;
4536
4537           quotient = gen_reg_rtx (compute_mode);
4538           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4539           label1 = gen_label_rtx ();
4540           label2 = gen_label_rtx ();
4541           label3 = gen_label_rtx ();
4542           label4 = gen_label_rtx ();
4543           label5 = gen_label_rtx ();
4544           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4545           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4546           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4547                               quotient, 0, OPTAB_LIB_WIDEN);
4548           if (tem != quotient)
4549             emit_move_insn (quotient, tem);
4550           emit_jump_insn (gen_jump (label5));
4551           emit_barrier ();
4552           emit_label (label1);
4553           expand_inc (adjusted_op0, const1_rtx);
4554           emit_jump_insn (gen_jump (label4));
4555           emit_barrier ();
4556           emit_label (label2);
4557           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4558           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4559                               quotient, 0, OPTAB_LIB_WIDEN);
4560           if (tem != quotient)
4561             emit_move_insn (quotient, tem);
4562           emit_jump_insn (gen_jump (label5));
4563           emit_barrier ();
4564           emit_label (label3);
4565           expand_dec (adjusted_op0, const1_rtx);
4566           emit_label (label4);
4567           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4568                               quotient, 0, OPTAB_LIB_WIDEN);
4569           if (tem != quotient)
4570             emit_move_insn (quotient, tem);
4571           expand_dec (quotient, const1_rtx);
4572           emit_label (label5);
4573         }
4574         break;
4575
4576       case CEIL_DIV_EXPR:
4577       case CEIL_MOD_EXPR:
4578         if (unsignedp)
4579           {
4580             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4581               {
4582                 rtx t1, t2, t3;
4583                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4584                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4585                                    floor_log2 (d), tquotient, 1);
4586                 t2 = expand_binop (compute_mode, and_optab, op0,
4587                                    gen_int_mode (d - 1, compute_mode),
4588                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4589                 t3 = gen_reg_rtx (compute_mode);
4590                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4591                                       compute_mode, 1, 1);
4592                 if (t3 == 0)
4593                   {
4594                     rtx_code_label *lab;
4595                     lab = gen_label_rtx ();
4596                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4597                     expand_inc (t1, const1_rtx);
4598                     emit_label (lab);
4599                     quotient = t1;
4600                   }
4601                 else
4602                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4603                                                           t1, t3),
4604                                             tquotient);
4605                 break;
4606               }
4607
4608             /* Try using an instruction that produces both the quotient and
4609                remainder, using truncation.  We can easily compensate the
4610                quotient or remainder to get ceiling rounding, once we have the
4611                remainder.  Notice that we compute also the final remainder
4612                value here, and return the result right away.  */
4613             if (target == 0 || GET_MODE (target) != compute_mode)
4614               target = gen_reg_rtx (compute_mode);
4615
4616             if (rem_flag)
4617               {
4618                 remainder = (REG_P (target)
4619                              ? target : gen_reg_rtx (compute_mode));
4620                 quotient = gen_reg_rtx (compute_mode);
4621               }
4622             else
4623               {
4624                 quotient = (REG_P (target)
4625                             ? target : gen_reg_rtx (compute_mode));
4626                 remainder = gen_reg_rtx (compute_mode);
4627               }
4628
4629             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4630                                      remainder, 1))
4631               {
4632                 /* This could be computed with a branch-less sequence.
4633                    Save that for later.  */
4634                 rtx_code_label *label = gen_label_rtx ();
4635                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4636                                  compute_mode, label);
4637                 expand_inc (quotient, const1_rtx);
4638                 expand_dec (remainder, op1);
4639                 emit_label (label);
4640                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4641               }
4642
4643             /* No luck with division elimination or divmod.  Have to do it
4644                by conditionally adjusting op0 *and* the result.  */
4645             {
4646               rtx_code_label *label1, *label2;
4647               rtx adjusted_op0, tem;
4648
4649               quotient = gen_reg_rtx (compute_mode);
4650               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4651               label1 = gen_label_rtx ();
4652               label2 = gen_label_rtx ();
4653               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4654                                compute_mode, label1);
4655               emit_move_insn  (quotient, const0_rtx);
4656               emit_jump_insn (gen_jump (label2));
4657               emit_barrier ();
4658               emit_label (label1);
4659               expand_dec (adjusted_op0, const1_rtx);
4660               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4661                                   quotient, 1, OPTAB_LIB_WIDEN);
4662               if (tem != quotient)
4663                 emit_move_insn (quotient, tem);
4664               expand_inc (quotient, const1_rtx);
4665               emit_label (label2);
4666             }
4667           }
4668         else /* signed */
4669           {
4670             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4671                 && INTVAL (op1) >= 0)
4672               {
4673                 /* This is extremely similar to the code for the unsigned case
4674                    above.  For 2.7 we should merge these variants, but for
4675                    2.6.1 I don't want to touch the code for unsigned since that
4676                    get used in C.  The signed case will only be used by other
4677                    languages (Ada).  */
4678
4679                 rtx t1, t2, t3;
4680                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4681                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4682                                    floor_log2 (d), tquotient, 0);
4683                 t2 = expand_binop (compute_mode, and_optab, op0,
4684                                    gen_int_mode (d - 1, compute_mode),
4685                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4686                 t3 = gen_reg_rtx (compute_mode);
4687                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4688                                       compute_mode, 1, 1);
4689                 if (t3 == 0)
4690                   {
4691                     rtx_code_label *lab;
4692                     lab = gen_label_rtx ();
4693                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4694                     expand_inc (t1, const1_rtx);
4695                     emit_label (lab);
4696                     quotient = t1;
4697                   }
4698                 else
4699                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4700                                                           t1, t3),
4701                                             tquotient);
4702                 break;
4703               }
4704
4705             /* Try using an instruction that produces both the quotient and
4706                remainder, using truncation.  We can easily compensate the
4707                quotient or remainder to get ceiling rounding, once we have the
4708                remainder.  Notice that we compute also the final remainder
4709                value here, and return the result right away.  */
4710             if (target == 0 || GET_MODE (target) != compute_mode)
4711               target = gen_reg_rtx (compute_mode);
4712             if (rem_flag)
4713               {
4714                 remainder= (REG_P (target)
4715                             ? target : gen_reg_rtx (compute_mode));
4716                 quotient = gen_reg_rtx (compute_mode);
4717               }
4718             else
4719               {
4720                 quotient = (REG_P (target)
4721                             ? target : gen_reg_rtx (compute_mode));
4722                 remainder = gen_reg_rtx (compute_mode);
4723               }
4724
4725             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4726                                      remainder, 0))
4727               {
4728                 /* This could be computed with a branch-less sequence.
4729                    Save that for later.  */
4730                 rtx tem;
4731                 rtx_code_label *label = gen_label_rtx ();
4732                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4733                                  compute_mode, label);
4734                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4735                                     NULL_RTX, 0, OPTAB_WIDEN);
4736                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4737                 expand_inc (quotient, const1_rtx);
4738                 expand_dec (remainder, op1);
4739                 emit_label (label);
4740                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4741               }
4742
4743             /* No luck with division elimination or divmod.  Have to do it
4744                by conditionally adjusting op0 *and* the result.  */
4745             {
4746               rtx_code_label *label1, *label2, *label3, *label4, *label5;
4747               rtx adjusted_op0;
4748               rtx tem;
4749
4750               quotient = gen_reg_rtx (compute_mode);
4751               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4752               label1 = gen_label_rtx ();
4753               label2 = gen_label_rtx ();
4754               label3 = gen_label_rtx ();
4755               label4 = gen_label_rtx ();
4756               label5 = gen_label_rtx ();
4757               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4758               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4759                                compute_mode, label1);
4760               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4761                                   quotient, 0, OPTAB_LIB_WIDEN);
4762               if (tem != quotient)
4763                 emit_move_insn (quotient, tem);
4764               emit_jump_insn (gen_jump (label5));
4765               emit_barrier ();
4766               emit_label (label1);
4767               expand_dec (adjusted_op0, const1_rtx);
4768               emit_jump_insn (gen_jump (label4));
4769               emit_barrier ();
4770               emit_label (label2);
4771               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4772                                compute_mode, label3);
4773               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4774                                   quotient, 0, OPTAB_LIB_WIDEN);
4775               if (tem != quotient)
4776                 emit_move_insn (quotient, tem);
4777               emit_jump_insn (gen_jump (label5));
4778               emit_barrier ();
4779               emit_label (label3);
4780               expand_inc (adjusted_op0, const1_rtx);
4781               emit_label (label4);
4782               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4783                                   quotient, 0, OPTAB_LIB_WIDEN);
4784               if (tem != quotient)
4785                 emit_move_insn (quotient, tem);
4786               expand_inc (quotient, const1_rtx);
4787               emit_label (label5);
4788             }
4789           }
4790         break;
4791
4792       case EXACT_DIV_EXPR:
4793         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4794           {
4795             HOST_WIDE_INT d = INTVAL (op1);
4796             unsigned HOST_WIDE_INT ml;
4797             int pre_shift;
4798             rtx t1;
4799
4800             pre_shift = floor_log2 (d & -d);
4801             ml = invert_mod2n (d >> pre_shift, size);
4802             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4803                                pre_shift, NULL_RTX, unsignedp);
4804             quotient = expand_mult (compute_mode, t1,
4805                                     gen_int_mode (ml, compute_mode),
4806                                     NULL_RTX, 1);
4807
4808             insn = get_last_insn ();
4809             set_dst_reg_note (insn, REG_EQUAL,
4810                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4811                                               compute_mode, op0, op1),
4812                               quotient);
4813           }
4814         break;
4815
4816       case ROUND_DIV_EXPR:
4817       case ROUND_MOD_EXPR:
4818         if (unsignedp)
4819           {
4820             rtx tem;
4821             rtx_code_label *label;
4822             label = gen_label_rtx ();
4823             quotient = gen_reg_rtx (compute_mode);
4824             remainder = gen_reg_rtx (compute_mode);
4825             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4826               {
4827                 rtx tem;
4828                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4829                                          quotient, 1, OPTAB_LIB_WIDEN);
4830                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4831                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4832                                           remainder, 1, OPTAB_LIB_WIDEN);
4833               }
4834             tem = plus_constant (compute_mode, op1, -1);
4835             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4836             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4837             expand_inc (quotient, const1_rtx);
4838             expand_dec (remainder, op1);
4839             emit_label (label);
4840           }
4841         else
4842           {
4843             rtx abs_rem, abs_op1, tem, mask;
4844             rtx_code_label *label;
4845             label = gen_label_rtx ();
4846             quotient = gen_reg_rtx (compute_mode);
4847             remainder = gen_reg_rtx (compute_mode);
4848             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4849               {
4850                 rtx tem;
4851                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4852                                          quotient, 0, OPTAB_LIB_WIDEN);
4853                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4854                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4855                                           remainder, 0, OPTAB_LIB_WIDEN);
4856               }
4857             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4858             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4859             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4860                                 1, NULL_RTX, 1);
4861             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4862             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4863                                 NULL_RTX, 0, OPTAB_WIDEN);
4864             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4865                                  size - 1, NULL_RTX, 0);
4866             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4867                                 NULL_RTX, 0, OPTAB_WIDEN);
4868             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4869                                 NULL_RTX, 0, OPTAB_WIDEN);
4870             expand_inc (quotient, tem);
4871             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4872                                 NULL_RTX, 0, OPTAB_WIDEN);
4873             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4874                                 NULL_RTX, 0, OPTAB_WIDEN);
4875             expand_dec (remainder, tem);
4876             emit_label (label);
4877           }
4878         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4879
4880       default:
4881         gcc_unreachable ();
4882       }
4883
4884   if (quotient == 0)
4885     {
4886       if (target && GET_MODE (target) != compute_mode)
4887         target = 0;
4888
4889       if (rem_flag)
4890         {
4891           /* Try to produce the remainder without producing the quotient.
4892              If we seem to have a divmod pattern that does not require widening,
4893              don't try widening here.  We should really have a WIDEN argument
4894              to expand_twoval_binop, since what we'd really like to do here is
4895              1) try a mod insn in compute_mode
4896              2) try a divmod insn in compute_mode
4897              3) try a div insn in compute_mode and multiply-subtract to get
4898                 remainder
4899              4) try the same things with widening allowed.  */
4900           remainder
4901             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4902                                  op0, op1, target,
4903                                  unsignedp,
4904                                  ((optab_handler (optab2, compute_mode)
4905                                    != CODE_FOR_nothing)
4906                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4907           if (remainder == 0)
4908             {
4909               /* No luck there.  Can we do remainder and divide at once
4910                  without a library call?  */
4911               remainder = gen_reg_rtx (compute_mode);
4912               if (! expand_twoval_binop ((unsignedp
4913                                           ? udivmod_optab
4914                                           : sdivmod_optab),
4915                                          op0, op1,
4916                                          NULL_RTX, remainder, unsignedp))
4917                 remainder = 0;
4918             }
4919
4920           if (remainder)
4921             return gen_lowpart (mode, remainder);
4922         }
4923
4924       /* Produce the quotient.  Try a quotient insn, but not a library call.
4925          If we have a divmod in this mode, use it in preference to widening
4926          the div (for this test we assume it will not fail). Note that optab2
4927          is set to the one of the two optabs that the call below will use.  */
4928       quotient
4929         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4930                              op0, op1, rem_flag ? NULL_RTX : target,
4931                              unsignedp,
4932                              ((optab_handler (optab2, compute_mode)
4933                                != CODE_FOR_nothing)
4934                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4935
4936       if (quotient == 0)
4937         {
4938           /* No luck there.  Try a quotient-and-remainder insn,
4939              keeping the quotient alone.  */
4940           quotient = gen_reg_rtx (compute_mode);
4941           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4942                                      op0, op1,
4943                                      quotient, NULL_RTX, unsignedp))
4944             {
4945               quotient = 0;
4946               if (! rem_flag)
4947                 /* Still no luck.  If we are not computing the remainder,
4948                    use a library call for the quotient.  */
4949                 quotient = sign_expand_binop (compute_mode,
4950                                               udiv_optab, sdiv_optab,
4951                                               op0, op1, target,
4952                                               unsignedp, OPTAB_LIB_WIDEN);
4953             }
4954         }
4955     }
4956
4957   if (rem_flag)
4958     {
4959       if (target && GET_MODE (target) != compute_mode)
4960         target = 0;
4961
4962       if (quotient == 0)
4963         {
4964           /* No divide instruction either.  Use library for remainder.  */
4965           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4966                                          op0, op1, target,
4967                                          unsignedp, OPTAB_LIB_WIDEN);
4968           /* No remainder function.  Try a quotient-and-remainder
4969              function, keeping the remainder.  */
4970           if (!remainder)
4971             {
4972               remainder = gen_reg_rtx (compute_mode);
4973               if (!expand_twoval_binop_libfunc
4974                   (unsignedp ? udivmod_optab : sdivmod_optab,
4975                    op0, op1,
4976                    NULL_RTX, remainder,
4977                    unsignedp ? UMOD : MOD))
4978                 remainder = NULL_RTX;
4979             }
4980         }
4981       else
4982         {
4983           /* We divided.  Now finish doing X - Y * (X / Y).  */
4984           remainder = expand_mult (compute_mode, quotient, op1,
4985                                    NULL_RTX, unsignedp);
4986           remainder = expand_binop (compute_mode, sub_optab, op0,
4987                                     remainder, target, unsignedp,
4988                                     OPTAB_LIB_WIDEN);
4989         }
4990     }
4991
4992   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4993 }
4994 \f
4995 /* Return a tree node with data type TYPE, describing the value of X.
4996    Usually this is an VAR_DECL, if there is no obvious better choice.
4997    X may be an expression, however we only support those expressions
4998    generated by loop.c.  */
4999
5000 tree
5001 make_tree (tree type, rtx x)
5002 {
5003   tree t;
5004
5005   switch (GET_CODE (x))
5006     {
5007     case CONST_INT:
5008     case CONST_WIDE_INT:
5009       t = wide_int_to_tree (type, std::make_pair (x, TYPE_MODE (type)));
5010       return t;
5011
5012     case CONST_DOUBLE:
5013       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5014       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5015         t = wide_int_to_tree (type,
5016                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5017                                                     HOST_BITS_PER_WIDE_INT * 2));
5018       else
5019         {
5020           REAL_VALUE_TYPE d;
5021
5022           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
5023           t = build_real (type, d);
5024         }
5025
5026       return t;
5027
5028     case CONST_VECTOR:
5029       {
5030         int units = CONST_VECTOR_NUNITS (x);
5031         tree itype = TREE_TYPE (type);
5032         tree *elts;
5033         int i;
5034
5035         /* Build a tree with vector elements.  */
5036         elts = XALLOCAVEC (tree, units);
5037         for (i = units - 1; i >= 0; --i)
5038           {
5039             rtx elt = CONST_VECTOR_ELT (x, i);
5040             elts[i] = make_tree (itype, elt);
5041           }
5042
5043         return build_vector (type, elts);
5044       }
5045
5046     case PLUS:
5047       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5048                           make_tree (type, XEXP (x, 1)));
5049
5050     case MINUS:
5051       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5052                           make_tree (type, XEXP (x, 1)));
5053
5054     case NEG:
5055       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5056
5057     case MULT:
5058       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5059                           make_tree (type, XEXP (x, 1)));
5060
5061     case ASHIFT:
5062       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5063                           make_tree (type, XEXP (x, 1)));
5064
5065     case LSHIFTRT:
5066       t = unsigned_type_for (type);
5067       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5068                                          make_tree (t, XEXP (x, 0)),
5069                                          make_tree (type, XEXP (x, 1))));
5070
5071     case ASHIFTRT:
5072       t = signed_type_for (type);
5073       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5074                                          make_tree (t, XEXP (x, 0)),
5075                                          make_tree (type, XEXP (x, 1))));
5076
5077     case DIV:
5078       if (TREE_CODE (type) != REAL_TYPE)
5079         t = signed_type_for (type);
5080       else
5081         t = type;
5082
5083       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5084                                          make_tree (t, XEXP (x, 0)),
5085                                          make_tree (t, XEXP (x, 1))));
5086     case UDIV:
5087       t = unsigned_type_for (type);
5088       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5089                                          make_tree (t, XEXP (x, 0)),
5090                                          make_tree (t, XEXP (x, 1))));
5091
5092     case SIGN_EXTEND:
5093     case ZERO_EXTEND:
5094       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5095                                           GET_CODE (x) == ZERO_EXTEND);
5096       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5097
5098     case CONST:
5099       return make_tree (type, XEXP (x, 0));
5100
5101     case SYMBOL_REF:
5102       t = SYMBOL_REF_DECL (x);
5103       if (t)
5104         return fold_convert (type, build_fold_addr_expr (t));
5105       /* else fall through.  */
5106
5107     default:
5108       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5109
5110       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5111          address mode to pointer mode.  */
5112       if (POINTER_TYPE_P (type))
5113         x = convert_memory_address_addr_space
5114               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5115
5116       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5117          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5118       t->decl_with_rtl.rtl = x;
5119
5120       return t;
5121     }
5122 }
5123 \f
5124 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5125    and returning TARGET.
5126
5127    If TARGET is 0, a pseudo-register or constant is returned.  */
5128
5129 rtx
5130 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5131 {
5132   rtx tem = 0;
5133
5134   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5135     tem = simplify_binary_operation (AND, mode, op0, op1);
5136   if (tem == 0)
5137     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5138
5139   if (target == 0)
5140     target = tem;
5141   else if (tem != target)
5142     emit_move_insn (target, tem);
5143   return target;
5144 }
5145
5146 /* Helper function for emit_store_flag.  */
5147 rtx
5148 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5149              machine_mode mode, machine_mode compare_mode,
5150              int unsignedp, rtx x, rtx y, int normalizep,
5151              machine_mode target_mode)
5152 {
5153   struct expand_operand ops[4];
5154   rtx op0, comparison, subtarget;
5155   rtx_insn *last;
5156   machine_mode result_mode = targetm.cstore_mode (icode);
5157
5158   last = get_last_insn ();
5159   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5160   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5161   if (!x || !y)
5162     {
5163       delete_insns_since (last);
5164       return NULL_RTX;
5165     }
5166
5167   if (target_mode == VOIDmode)
5168     target_mode = result_mode;
5169   if (!target)
5170     target = gen_reg_rtx (target_mode);
5171
5172   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5173
5174   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5175   create_fixed_operand (&ops[1], comparison);
5176   create_fixed_operand (&ops[2], x);
5177   create_fixed_operand (&ops[3], y);
5178   if (!maybe_expand_insn (icode, 4, ops))
5179     {
5180       delete_insns_since (last);
5181       return NULL_RTX;
5182     }
5183   subtarget = ops[0].value;
5184
5185   /* If we are converting to a wider mode, first convert to
5186      TARGET_MODE, then normalize.  This produces better combining
5187      opportunities on machines that have a SIGN_EXTRACT when we are
5188      testing a single bit.  This mostly benefits the 68k.
5189
5190      If STORE_FLAG_VALUE does not have the sign bit set when
5191      interpreted in MODE, we can do this conversion as unsigned, which
5192      is usually more efficient.  */
5193   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5194     {
5195       convert_move (target, subtarget,
5196                     val_signbit_known_clear_p (result_mode,
5197                                                STORE_FLAG_VALUE));
5198       op0 = target;
5199       result_mode = target_mode;
5200     }
5201   else
5202     op0 = subtarget;
5203
5204   /* If we want to keep subexpressions around, don't reuse our last
5205      target.  */
5206   if (optimize)
5207     subtarget = 0;
5208
5209   /* Now normalize to the proper value in MODE.  Sometimes we don't
5210      have to do anything.  */
5211   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5212     ;
5213   /* STORE_FLAG_VALUE might be the most negative number, so write
5214      the comparison this way to avoid a compiler-time warning.  */
5215   else if (- normalizep == STORE_FLAG_VALUE)
5216     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5217
5218   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5219      it hard to use a value of just the sign bit due to ANSI integer
5220      constant typing rules.  */
5221   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5222     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5223                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5224                         normalizep == 1);
5225   else
5226     {
5227       gcc_assert (STORE_FLAG_VALUE & 1);
5228
5229       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5230       if (normalizep == -1)
5231         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5232     }
5233
5234   /* If we were converting to a smaller mode, do the conversion now.  */
5235   if (target_mode != result_mode)
5236     {
5237       convert_move (target, op0, 0);
5238       return target;
5239     }
5240   else
5241     return op0;
5242 }
5243
5244
5245 /* A subroutine of emit_store_flag only including "tricks" that do not
5246    need a recursive call.  These are kept separate to avoid infinite
5247    loops.  */
5248
5249 static rtx
5250 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5251                    machine_mode mode, int unsignedp, int normalizep,
5252                    machine_mode target_mode)
5253 {
5254   rtx subtarget;
5255   enum insn_code icode;
5256   machine_mode compare_mode;
5257   enum mode_class mclass;
5258   enum rtx_code scode;
5259   rtx tem;
5260
5261   if (unsignedp)
5262     code = unsigned_condition (code);
5263   scode = swap_condition (code);
5264
5265   /* If one operand is constant, make it the second one.  Only do this
5266      if the other operand is not constant as well.  */
5267
5268   if (swap_commutative_operands_p (op0, op1))
5269     {
5270       tem = op0;
5271       op0 = op1;
5272       op1 = tem;
5273       code = swap_condition (code);
5274     }
5275
5276   if (mode == VOIDmode)
5277     mode = GET_MODE (op0);
5278
5279   /* For some comparisons with 1 and -1, we can convert this to
5280      comparisons with zero.  This will often produce more opportunities for
5281      store-flag insns.  */
5282
5283   switch (code)
5284     {
5285     case LT:
5286       if (op1 == const1_rtx)
5287         op1 = const0_rtx, code = LE;
5288       break;
5289     case LE:
5290       if (op1 == constm1_rtx)
5291         op1 = const0_rtx, code = LT;
5292       break;
5293     case GE:
5294       if (op1 == const1_rtx)
5295         op1 = const0_rtx, code = GT;
5296       break;
5297     case GT:
5298       if (op1 == constm1_rtx)
5299         op1 = const0_rtx, code = GE;
5300       break;
5301     case GEU:
5302       if (op1 == const1_rtx)
5303         op1 = const0_rtx, code = NE;
5304       break;
5305     case LTU:
5306       if (op1 == const1_rtx)
5307         op1 = const0_rtx, code = EQ;
5308       break;
5309     default:
5310       break;
5311     }
5312
5313   /* If we are comparing a double-word integer with zero or -1, we can
5314      convert the comparison into one involving a single word.  */
5315   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5316       && GET_MODE_CLASS (mode) == MODE_INT
5317       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5318     {
5319       if ((code == EQ || code == NE)
5320           && (op1 == const0_rtx || op1 == constm1_rtx))
5321         {
5322           rtx op00, op01;
5323
5324           /* Do a logical OR or AND of the two words and compare the
5325              result.  */
5326           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5327           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5328           tem = expand_binop (word_mode,
5329                               op1 == const0_rtx ? ior_optab : and_optab,
5330                               op00, op01, NULL_RTX, unsignedp,
5331                               OPTAB_DIRECT);
5332
5333           if (tem != 0)
5334             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5335                                    unsignedp, normalizep);
5336         }
5337       else if ((code == LT || code == GE) && op1 == const0_rtx)
5338         {
5339           rtx op0h;
5340
5341           /* If testing the sign bit, can just test on high word.  */
5342           op0h = simplify_gen_subreg (word_mode, op0, mode,
5343                                       subreg_highpart_offset (word_mode,
5344                                                               mode));
5345           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5346                                  unsignedp, normalizep);
5347         }
5348       else
5349         tem = NULL_RTX;
5350
5351       if (tem)
5352         {
5353           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5354             return tem;
5355           if (!target)
5356             target = gen_reg_rtx (target_mode);
5357
5358           convert_move (target, tem,
5359                         !val_signbit_known_set_p (word_mode,
5360                                                   (normalizep ? normalizep
5361                                                    : STORE_FLAG_VALUE)));
5362           return target;
5363         }
5364     }
5365
5366   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5367      complement of A (for GE) and shifting the sign bit to the low bit.  */
5368   if (op1 == const0_rtx && (code == LT || code == GE)
5369       && GET_MODE_CLASS (mode) == MODE_INT
5370       && (normalizep || STORE_FLAG_VALUE == 1
5371           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5372     {
5373       subtarget = target;
5374
5375       if (!target)
5376         target_mode = mode;
5377
5378       /* If the result is to be wider than OP0, it is best to convert it
5379          first.  If it is to be narrower, it is *incorrect* to convert it
5380          first.  */
5381       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5382         {
5383           op0 = convert_modes (target_mode, mode, op0, 0);
5384           mode = target_mode;
5385         }
5386
5387       if (target_mode != mode)
5388         subtarget = 0;
5389
5390       if (code == GE)
5391         op0 = expand_unop (mode, one_cmpl_optab, op0,
5392                            ((STORE_FLAG_VALUE == 1 || normalizep)
5393                             ? 0 : subtarget), 0);
5394
5395       if (STORE_FLAG_VALUE == 1 || normalizep)
5396         /* If we are supposed to produce a 0/1 value, we want to do
5397            a logical shift from the sign bit to the low-order bit; for
5398            a -1/0 value, we do an arithmetic shift.  */
5399         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5400                             GET_MODE_BITSIZE (mode) - 1,
5401                             subtarget, normalizep != -1);
5402
5403       if (mode != target_mode)
5404         op0 = convert_modes (target_mode, mode, op0, 0);
5405
5406       return op0;
5407     }
5408
5409   mclass = GET_MODE_CLASS (mode);
5410   for (compare_mode = mode; compare_mode != VOIDmode;
5411        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5412     {
5413      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5414      icode = optab_handler (cstore_optab, optab_mode);
5415      if (icode != CODE_FOR_nothing)
5416         {
5417           do_pending_stack_adjust ();
5418           tem = emit_cstore (target, icode, code, mode, compare_mode,
5419                              unsignedp, op0, op1, normalizep, target_mode);
5420           if (tem)
5421             return tem;
5422
5423           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5424             {
5425               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5426                                  unsignedp, op1, op0, normalizep, target_mode);
5427               if (tem)
5428                 return tem;
5429             }
5430           break;
5431         }
5432     }
5433
5434   return 0;
5435 }
5436
5437 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5438    and storing in TARGET.  Normally return TARGET.
5439    Return 0 if that cannot be done.
5440
5441    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5442    it is VOIDmode, they cannot both be CONST_INT.
5443
5444    UNSIGNEDP is for the case where we have to widen the operands
5445    to perform the operation.  It says to use zero-extension.
5446
5447    NORMALIZEP is 1 if we should convert the result to be either zero
5448    or one.  Normalize is -1 if we should convert the result to be
5449    either zero or -1.  If NORMALIZEP is zero, the result will be left
5450    "raw" out of the scc insn.  */
5451
5452 rtx
5453 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5454                  machine_mode mode, int unsignedp, int normalizep)
5455 {
5456   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5457   enum rtx_code rcode;
5458   rtx subtarget;
5459   rtx tem, trueval;
5460   rtx_insn *last;
5461
5462   /* If we compare constants, we shouldn't use a store-flag operation,
5463      but a constant load.  We can get there via the vanilla route that
5464      usually generates a compare-branch sequence, but will in this case
5465      fold the comparison to a constant, and thus elide the branch.  */
5466   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5467     return NULL_RTX;
5468
5469   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5470                            target_mode);
5471   if (tem)
5472     return tem;
5473
5474   /* If we reached here, we can't do this with a scc insn, however there
5475      are some comparisons that can be done in other ways.  Don't do any
5476      of these cases if branches are very cheap.  */
5477   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5478     return 0;
5479
5480   /* See what we need to return.  We can only return a 1, -1, or the
5481      sign bit.  */
5482
5483   if (normalizep == 0)
5484     {
5485       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5486         normalizep = STORE_FLAG_VALUE;
5487
5488       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5489         ;
5490       else
5491         return 0;
5492     }
5493
5494   last = get_last_insn ();
5495
5496   /* If optimizing, use different pseudo registers for each insn, instead
5497      of reusing the same pseudo.  This leads to better CSE, but slows
5498      down the compiler, since there are more pseudos */
5499   subtarget = (!optimize
5500                && (target_mode == mode)) ? target : NULL_RTX;
5501   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5502
5503   /* For floating-point comparisons, try the reverse comparison or try
5504      changing the "orderedness" of the comparison.  */
5505   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5506     {
5507       enum rtx_code first_code;
5508       bool and_them;
5509
5510       rcode = reverse_condition_maybe_unordered (code);
5511       if (can_compare_p (rcode, mode, ccp_store_flag)
5512           && (code == ORDERED || code == UNORDERED
5513               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5514               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5515         {
5516           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5517                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5518
5519           /* For the reverse comparison, use either an addition or a XOR.  */
5520           if (want_add
5521               && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5522                            optimize_insn_for_speed_p ()) == 0)
5523             {
5524               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5525                                        STORE_FLAG_VALUE, target_mode);
5526               if (tem)
5527                 return expand_binop (target_mode, add_optab, tem,
5528                                      gen_int_mode (normalizep, target_mode),
5529                                      target, 0, OPTAB_WIDEN);
5530             }
5531           else if (!want_add
5532                    && rtx_cost (trueval, XOR, 1,
5533                                 optimize_insn_for_speed_p ()) == 0)
5534             {
5535               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5536                                        normalizep, target_mode);
5537               if (tem)
5538                 return expand_binop (target_mode, xor_optab, tem, trueval,
5539                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5540             }
5541         }
5542
5543       delete_insns_since (last);
5544
5545       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5546       if (code == ORDERED || code == UNORDERED)
5547         return 0;
5548
5549       and_them = split_comparison (code, mode, &first_code, &code);
5550
5551       /* If there are no NaNs, the first comparison should always fall through.
5552          Effectively change the comparison to the other one.  */
5553       if (!HONOR_NANS (mode))
5554         {
5555           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5556           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5557                                     target_mode);
5558         }
5559
5560 #ifdef HAVE_conditional_move
5561       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5562          conditional move.  */
5563       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5564                                normalizep, target_mode);
5565       if (tem == 0)
5566         return 0;
5567
5568       if (and_them)
5569         tem = emit_conditional_move (target, code, op0, op1, mode,
5570                                      tem, const0_rtx, GET_MODE (tem), 0);
5571       else
5572         tem = emit_conditional_move (target, code, op0, op1, mode,
5573                                      trueval, tem, GET_MODE (tem), 0);
5574
5575       if (tem == 0)
5576         delete_insns_since (last);
5577       return tem;
5578 #else
5579       return 0;
5580 #endif
5581     }
5582
5583   /* The remaining tricks only apply to integer comparisons.  */
5584
5585   if (GET_MODE_CLASS (mode) != MODE_INT)
5586     return 0;
5587
5588   /* If this is an equality comparison of integers, we can try to exclusive-or
5589      (or subtract) the two operands and use a recursive call to try the
5590      comparison with zero.  Don't do any of these cases if branches are
5591      very cheap.  */
5592
5593   if ((code == EQ || code == NE) && op1 != const0_rtx)
5594     {
5595       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5596                           OPTAB_WIDEN);
5597
5598       if (tem == 0)
5599         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5600                             OPTAB_WIDEN);
5601       if (tem != 0)
5602         tem = emit_store_flag (target, code, tem, const0_rtx,
5603                                mode, unsignedp, normalizep);
5604       if (tem != 0)
5605         return tem;
5606
5607       delete_insns_since (last);
5608     }
5609
5610   /* For integer comparisons, try the reverse comparison.  However, for
5611      small X and if we'd have anyway to extend, implementing "X != 0"
5612      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5613   rcode = reverse_condition (code);
5614   if (can_compare_p (rcode, mode, ccp_store_flag)
5615       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5616             && code == NE
5617             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5618             && op1 == const0_rtx))
5619     {
5620       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5621                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5622
5623       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5624       if (want_add
5625           && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5626                        optimize_insn_for_speed_p ()) == 0)
5627         {
5628           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5629                                    STORE_FLAG_VALUE, target_mode);
5630           if (tem != 0)
5631             tem = expand_binop (target_mode, add_optab, tem,
5632                                 gen_int_mode (normalizep, target_mode),
5633                                 target, 0, OPTAB_WIDEN);
5634         }
5635       else if (!want_add
5636                && rtx_cost (trueval, XOR, 1,
5637                             optimize_insn_for_speed_p ()) == 0)
5638         {
5639           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5640                                    normalizep, target_mode);
5641           if (tem != 0)
5642             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5643                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5644         }
5645
5646       if (tem != 0)
5647         return tem;
5648       delete_insns_since (last);
5649     }
5650
5651   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5652      the constant zero.  Reject all other comparisons at this point.  Only
5653      do LE and GT if branches are expensive since they are expensive on
5654      2-operand machines.  */
5655
5656   if (op1 != const0_rtx
5657       || (code != EQ && code != NE
5658           && (BRANCH_COST (optimize_insn_for_speed_p (),
5659                            false) <= 1 || (code != LE && code != GT))))
5660     return 0;
5661
5662   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5663      do the necessary operation below.  */
5664
5665   tem = 0;
5666
5667   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5668      the sign bit set.  */
5669
5670   if (code == LE)
5671     {
5672       /* This is destructive, so SUBTARGET can't be OP0.  */
5673       if (rtx_equal_p (subtarget, op0))
5674         subtarget = 0;
5675
5676       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5677                           OPTAB_WIDEN);
5678       if (tem)
5679         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5680                             OPTAB_WIDEN);
5681     }
5682
5683   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5684      number of bits in the mode of OP0, minus one.  */
5685
5686   if (code == GT)
5687     {
5688       if (rtx_equal_p (subtarget, op0))
5689         subtarget = 0;
5690
5691       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5692                           GET_MODE_BITSIZE (mode) - 1,
5693                           subtarget, 0);
5694       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5695                           OPTAB_WIDEN);
5696     }
5697
5698   if (code == EQ || code == NE)
5699     {
5700       /* For EQ or NE, one way to do the comparison is to apply an operation
5701          that converts the operand into a positive number if it is nonzero
5702          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5703          for NE we negate.  This puts the result in the sign bit.  Then we
5704          normalize with a shift, if needed.
5705
5706          Two operations that can do the above actions are ABS and FFS, so try
5707          them.  If that doesn't work, and MODE is smaller than a full word,
5708          we can use zero-extension to the wider mode (an unsigned conversion)
5709          as the operation.  */
5710
5711       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5712          that is compensated by the subsequent overflow when subtracting
5713          one / negating.  */
5714
5715       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5716         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5717       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5718         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5719       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5720         {
5721           tem = convert_modes (word_mode, mode, op0, 1);
5722           mode = word_mode;
5723         }
5724
5725       if (tem != 0)
5726         {
5727           if (code == EQ)
5728             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5729                                 0, OPTAB_WIDEN);
5730           else
5731             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5732         }
5733
5734       /* If we couldn't do it that way, for NE we can "or" the two's complement
5735          of the value with itself.  For EQ, we take the one's complement of
5736          that "or", which is an extra insn, so we only handle EQ if branches
5737          are expensive.  */
5738
5739       if (tem == 0
5740           && (code == NE
5741               || BRANCH_COST (optimize_insn_for_speed_p (),
5742                               false) > 1))
5743         {
5744           if (rtx_equal_p (subtarget, op0))
5745             subtarget = 0;
5746
5747           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5748           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5749                               OPTAB_WIDEN);
5750
5751           if (tem && code == EQ)
5752             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5753         }
5754     }
5755
5756   if (tem && normalizep)
5757     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5758                         GET_MODE_BITSIZE (mode) - 1,
5759                         subtarget, normalizep == 1);
5760
5761   if (tem)
5762     {
5763       if (!target)
5764         ;
5765       else if (GET_MODE (tem) != target_mode)
5766         {
5767           convert_move (target, tem, 0);
5768           tem = target;
5769         }
5770       else if (!subtarget)
5771         {
5772           emit_move_insn (target, tem);
5773           tem = target;
5774         }
5775     }
5776   else
5777     delete_insns_since (last);
5778
5779   return tem;
5780 }
5781
5782 /* Like emit_store_flag, but always succeeds.  */
5783
5784 rtx
5785 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5786                        machine_mode mode, int unsignedp, int normalizep)
5787 {
5788   rtx tem;
5789   rtx_code_label *label;
5790   rtx trueval, falseval;
5791
5792   /* First see if emit_store_flag can do the job.  */
5793   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5794   if (tem != 0)
5795     return tem;
5796
5797   if (!target)
5798     target = gen_reg_rtx (word_mode);
5799
5800   /* If this failed, we have to do this with set/compare/jump/set code.
5801      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5802   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5803   if (code == NE
5804       && GET_MODE_CLASS (mode) == MODE_INT
5805       && REG_P (target)
5806       && op0 == target
5807       && op1 == const0_rtx)
5808     {
5809       label = gen_label_rtx ();
5810       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5811                                mode, NULL_RTX, NULL_RTX, label, -1);
5812       emit_move_insn (target, trueval);
5813       emit_label (label);
5814       return target;
5815     }
5816
5817   if (!REG_P (target)
5818       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5819     target = gen_reg_rtx (GET_MODE (target));
5820
5821   /* Jump in the right direction if the target cannot implement CODE
5822      but can jump on its reverse condition.  */
5823   falseval = const0_rtx;
5824   if (! can_compare_p (code, mode, ccp_jump)
5825       && (! FLOAT_MODE_P (mode)
5826           || code == ORDERED || code == UNORDERED
5827           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5828           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5829     {
5830       enum rtx_code rcode;
5831       if (FLOAT_MODE_P (mode))
5832         rcode = reverse_condition_maybe_unordered (code);
5833       else
5834         rcode = reverse_condition (code);
5835
5836       /* Canonicalize to UNORDERED for the libcall.  */
5837       if (can_compare_p (rcode, mode, ccp_jump)
5838           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5839         {
5840           falseval = trueval;
5841           trueval = const0_rtx;
5842           code = rcode;
5843         }
5844     }
5845
5846   emit_move_insn (target, trueval);
5847   label = gen_label_rtx ();
5848   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5849                            NULL_RTX, label, -1);
5850
5851   emit_move_insn (target, falseval);
5852   emit_label (label);
5853
5854   return target;
5855 }
5856 \f
5857 /* Perform possibly multi-word comparison and conditional jump to LABEL
5858    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5859    now a thin wrapper around do_compare_rtx_and_jump.  */
5860
5861 static void
5862 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
5863                  rtx_code_label *label)
5864 {
5865   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5866   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5867                            NULL_RTX, NULL_RTX, label, -1);
5868 }