gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2017 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "target.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "predict.h"
  30 #include "memmodel.h"
  31 #include "tm_p.h"
  32 #include "expmed.h"
  33 #include "optabs.h"
  34 #include "emit-rtl.h"
  35 #include "diagnostic-core.h"
  36 #include "fold-const.h"
  37 #include "stor-layout.h"
  38 #include "dojump.h"
  39 #include "explow.h"
  40 #include "expr.h"
  41 #include "langhooks.h"
  42
  43 struct target_expmed default_target_expmed;
  44 #if SWITCHABLE_TARGET
  45 struct target_expmed *this_target_expmed = &default_target_expmed;
  46 #endif
  47
  48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  49                                    unsigned HOST_WIDE_INT,
  50                                    unsigned HOST_WIDE_INT,
  51                                    unsigned HOST_WIDE_INT,
  52                                    rtx, bool);
  53 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
  54                                      unsigned HOST_WIDE_INT,
  55                                      rtx, bool);
  56 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  57                                    unsigned HOST_WIDE_INT,
  58                                    unsigned HOST_WIDE_INT,
  59                                    unsigned HOST_WIDE_INT,
  60                                    rtx, bool);
  61 static rtx extract_fixed_bit_field (machine_mode, rtx,
  62                                     unsigned HOST_WIDE_INT,
  63                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  64 static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
  65                                       unsigned HOST_WIDE_INT,
  66                                       unsigned HOST_WIDE_INT, rtx, int, bool);
  67 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  68 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  69                                     unsigned HOST_WIDE_INT, int, bool);
  70 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  71 static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  72 static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  73
  74 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  75    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  76    The mask is truncated if necessary to the width of mode MODE.  The
  77    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
  78
  79 static inline rtx
  80 mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
  81 {
  82   return immed_wide_int_const
  83     (wi::shifted_mask (bitpos, bitsize, complement,
  84                        GET_MODE_PRECISION (mode)), mode);
  85 }
  86
  87 /* Test whether a value is zero of a power of two.  */
  88 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
  89   (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
  90
  91 struct init_expmed_rtl
  92 {
  93   rtx reg;
  94   rtx plus;
  95   rtx neg;
  96   rtx mult;
  97   rtx sdiv;
  98   rtx udiv;
  99   rtx sdiv_32;
 100   rtx smod_32;
 101   rtx wide_mult;
 102   rtx wide_lshr;
 103   rtx wide_trunc;
 104   rtx shift;
 105   rtx shift_mult;
 106   rtx shift_add;
 107   rtx shift_sub0;
 108   rtx shift_sub1;
 109   rtx zext;
 110   rtx trunc;
 111
 112   rtx pow2[MAX_BITS_PER_WORD];
 113   rtx cint[MAX_BITS_PER_WORD];
 114 };
 115
 116 static void
 117 init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
 118                       machine_mode from_mode, bool speed)
 119 {
 120   int to_size, from_size;
 121   rtx which;
 122
 123   to_size = GET_MODE_PRECISION (to_mode);
 124   from_size = GET_MODE_PRECISION (from_mode);
 125
 126   /* Most partial integers have a precision less than the "full"
 127      integer it requires for storage.  In case one doesn't, for
 128      comparison purposes here, reduce the bit size by one in that
 129      case.  */
 130   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 131       && pow2p_hwi (to_size))
 132     to_size --;
 133   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 134       && pow2p_hwi (from_size))
 135     from_size --;
 136
 137   /* Assume cost of zero-extend and sign-extend is the same.  */
 138   which = (to_size < from_size ? all->trunc : all->zext);
 139
 140   PUT_MODE (all->reg, from_mode);
 141   set_convert_cost (to_mode, from_mode, speed,
 142                     set_src_cost (which, to_mode, speed));
 143 }
 144
 145 static void
 146 init_expmed_one_mode (struct init_expmed_rtl *all,
 147                       machine_mode mode, int speed)
 148 {
 149   int m, n, mode_bitsize;
 150   machine_mode mode_from;
 151
 152   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 153
 154   PUT_MODE (all->reg, mode);
 155   PUT_MODE (all->plus, mode);
 156   PUT_MODE (all->neg, mode);
 157   PUT_MODE (all->mult, mode);
 158   PUT_MODE (all->sdiv, mode);
 159   PUT_MODE (all->udiv, mode);
 160   PUT_MODE (all->sdiv_32, mode);
 161   PUT_MODE (all->smod_32, mode);
 162   PUT_MODE (all->wide_trunc, mode);
 163   PUT_MODE (all->shift, mode);
 164   PUT_MODE (all->shift_mult, mode);
 165   PUT_MODE (all->shift_add, mode);
 166   PUT_MODE (all->shift_sub0, mode);
 167   PUT_MODE (all->shift_sub1, mode);
 168   PUT_MODE (all->zext, mode);
 169   PUT_MODE (all->trunc, mode);
 170
 171   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
 172   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
 173   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
 174   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
 175   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
 176
 177   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
 178                                      <= 2 * add_cost (speed, mode)));
 179   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
 180                                      <= 4 * add_cost (speed, mode)));
 181
 182   set_shift_cost (speed, mode, 0, 0);
 183   {
 184     int cost = add_cost (speed, mode);
 185     set_shiftadd_cost (speed, mode, 0, cost);
 186     set_shiftsub0_cost (speed, mode, 0, cost);
 187     set_shiftsub1_cost (speed, mode, 0, cost);
 188   }
 189
 190   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 191   for (m = 1; m < n; m++)
 192     {
 193       XEXP (all->shift, 1) = all->cint[m];
 194       XEXP (all->shift_mult, 1) = all->pow2[m];
 195
 196       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
 197       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
 198                                                        speed));
 199       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
 200                                                         speed));
 201       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
 202                                                         speed));
 203     }
 204
 205   if (SCALAR_INT_MODE_P (mode))
 206     {
 207       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 208            mode_from = (machine_mode)(mode_from + 1))
 209         init_expmed_one_conv (all, mode, mode_from, speed);
 210
 211       machine_mode wider_mode;
 212       if (GET_MODE_CLASS (mode) == MODE_INT
 213           && GET_MODE_WIDER_MODE (mode).exists (&wider_mode))
 214         {
 215           PUT_MODE (all->zext, wider_mode);
 216           PUT_MODE (all->wide_mult, wider_mode);
 217           PUT_MODE (all->wide_lshr, wider_mode);
 218           XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 219
 220           set_mul_widen_cost (speed, wider_mode,
 221                               set_src_cost (all->wide_mult, wider_mode, speed));
 222           set_mul_highpart_cost (speed, mode,
 223                                  set_src_cost (all->wide_trunc, mode, speed));
 224         }
 225     }
 226 }
 227
 228 void
 229 init_expmed (void)
 230 {
 231   struct init_expmed_rtl all;
 232   machine_mode mode = QImode;
 233   int m, speed;
 234
 235   memset (&all, 0, sizeof all);
 236   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 237     {
 238       all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
 239       all.cint[m] = GEN_INT (m);
 240     }
 241
 242   /* Avoid using hard regs in ways which may be unsupported.  */
 243   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 244   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 245   all.neg = gen_rtx_NEG (mode, all.reg);
 246   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 247   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 248   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 249   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 250   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 251   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 252   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 253   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 254   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 255   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 256   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 257   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 258   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 259   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 260   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 261
 262   for (speed = 0; speed < 2; speed++)
 263     {
 264       crtl->maybe_hot_insn_p = speed;
 265       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
 266
 267       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 268            mode = (machine_mode)(mode + 1))
 269         init_expmed_one_mode (&all, mode, speed);
 270
 271       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 272         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 273              mode = (machine_mode)(mode + 1))
 274           init_expmed_one_mode (&all, mode, speed);
 275
 276       if (MIN_MODE_VECTOR_INT != VOIDmode)
 277         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 278              mode = (machine_mode)(mode + 1))
 279           init_expmed_one_mode (&all, mode, speed);
 280     }
 281
 282   if (alg_hash_used_p ())
 283     {
 284       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 285       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 286     }
 287   else
 288     set_alg_hash_used_p (true);
 289   default_rtl_profile ();
 290
 291   ggc_free (all.trunc);
 292   ggc_free (all.shift_sub1);
 293   ggc_free (all.shift_sub0);
 294   ggc_free (all.shift_add);
 295   ggc_free (all.shift_mult);
 296   ggc_free (all.shift);
 297   ggc_free (all.wide_trunc);
 298   ggc_free (all.wide_lshr);
 299   ggc_free (all.wide_mult);
 300   ggc_free (all.zext);
 301   ggc_free (all.smod_32);
 302   ggc_free (all.sdiv_32);
 303   ggc_free (all.udiv);
 304   ggc_free (all.sdiv);
 305   ggc_free (all.mult);
 306   ggc_free (all.neg);
 307   ggc_free (all.plus);
 308   ggc_free (all.reg);
 309 }
 310
 311 /* Return an rtx representing minus the value of X.
 312    MODE is the intended mode of the result,
 313    useful if X is a CONST_INT.  */
 314
 315 rtx
 316 negate_rtx (machine_mode mode, rtx x)
 317 {
 318   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 319
 320   if (result == 0)
 321     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 322
 323   return result;
 324 }
 325
 326 /* Whether reverse storage order is supported on the target.  */
 327 static int reverse_storage_order_supported = -1;
 328
 329 /* Check whether reverse storage order is supported on the target.  */
 330
 331 static void
 332 check_reverse_storage_order_support (void)
 333 {
 334   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 335     {
 336       reverse_storage_order_supported = 0;
 337       sorry ("reverse scalar storage order");
 338     }
 339   else
 340     reverse_storage_order_supported = 1;
 341 }
 342
 343 /* Whether reverse FP storage order is supported on the target.  */
 344 static int reverse_float_storage_order_supported = -1;
 345
 346 /* Check whether reverse FP storage order is supported on the target.  */
 347
 348 static void
 349 check_reverse_float_storage_order_support (void)
 350 {
 351   if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 352     {
 353       reverse_float_storage_order_supported = 0;
 354       sorry ("reverse floating-point scalar storage order");
 355     }
 356   else
 357     reverse_float_storage_order_supported = 1;
 358 }
 359
 360 /* Return an rtx representing value of X with reverse storage order.
 361    MODE is the intended mode of the result,
 362    useful if X is a CONST_INT.  */
 363
 364 rtx
 365 flip_storage_order (machine_mode mode, rtx x)
 366 {
 367   scalar_int_mode int_mode;
 368   rtx result;
 369
 370   if (mode == QImode)
 371     return x;
 372
 373   if (COMPLEX_MODE_P (mode))
 374     {
 375       rtx real = read_complex_part (x, false);
 376       rtx imag = read_complex_part (x, true);
 377
 378       real = flip_storage_order (GET_MODE_INNER (mode), real);
 379       imag = flip_storage_order (GET_MODE_INNER (mode), imag);
 380
 381       return gen_rtx_CONCAT (mode, real, imag);
 382     }
 383
 384   if (__builtin_expect (reverse_storage_order_supported < 0, 0))
 385     check_reverse_storage_order_support ();
 386
 387   if (!is_a <scalar_int_mode> (mode, &int_mode))
 388     {
 389       if (FLOAT_MODE_P (mode)
 390           && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
 391         check_reverse_float_storage_order_support ();
 392
 393       if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode))
 394         {
 395           sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
 396           return x;
 397         }
 398       x = gen_lowpart (int_mode, x);
 399     }
 400
 401   result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
 402   if (result == 0)
 403     result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
 404
 405   if (int_mode != mode)
 406     result = gen_lowpart (mode, result);
 407
 408   return result;
 409 }
 410
 411 /* Adjust bitfield memory MEM so that it points to the first unit of mode
 412    MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
 413    If MODE is BLKmode, return a reference to every byte in the bitfield.
 414    Set *NEW_BITNUM to the bit position of the field within the new memory.  */
 415
 416 static rtx
 417 narrow_bit_field_mem (rtx mem, machine_mode mode,
 418                       unsigned HOST_WIDE_INT bitsize,
 419                       unsigned HOST_WIDE_INT bitnum,
 420                       unsigned HOST_WIDE_INT *new_bitnum)
 421 {
 422   if (mode == BLKmode)
 423     {
 424       *new_bitnum = bitnum % BITS_PER_UNIT;
 425       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 426       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 427                             / BITS_PER_UNIT);
 428       return adjust_bitfield_address_size (mem, mode, offset, size);
 429     }
 430   else
 431     {
 432       unsigned int unit = GET_MODE_BITSIZE (mode);
 433       *new_bitnum = bitnum % unit;
 434       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 435       return adjust_bitfield_address (mem, mode, offset);
 436     }
 437 }
 438
 439 /* The caller wants to perform insertion or extraction PATTERN on a
 440    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 441    BITREGION_START and BITREGION_END are as for store_bit_field
 442    and FIELDMODE is the natural mode of the field.
 443
 444    Search for a mode that is compatible with the memory access
 445    restrictions and (where applicable) with a register insertion or
 446    extraction.  Return the new memory on success, storing the adjusted
 447    bit position in *NEW_BITNUM.  Return null otherwise.  */
 448
 449 static rtx
 450 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 451                               rtx op0, HOST_WIDE_INT bitsize,
 452                               HOST_WIDE_INT bitnum,
 453                               unsigned HOST_WIDE_INT bitregion_start,
 454                               unsigned HOST_WIDE_INT bitregion_end,
 455                               machine_mode fieldmode,
 456                               unsigned HOST_WIDE_INT *new_bitnum)
 457 {
 458   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 459                                 bitregion_end, MEM_ALIGN (op0),
 460                                 MEM_VOLATILE_P (op0));
 461   machine_mode best_mode;
 462   if (iter.next_mode (&best_mode))
 463     {
 464       /* We can use a memory in BEST_MODE.  See whether this is true for
 465          any wider modes.  All other things being equal, we prefer to
 466          use the widest mode possible because it tends to expose more
 467          CSE opportunities.  */
 468       if (!iter.prefer_smaller_modes ())
 469         {
 470           /* Limit the search to the mode required by the corresponding
 471              register insertion or extraction instruction, if any.  */
 472           machine_mode limit_mode = word_mode;
 473           extraction_insn insn;
 474           if (get_best_reg_extraction_insn (&insn, pattern,
 475                                             GET_MODE_BITSIZE (best_mode),
 476                                             fieldmode))
 477             limit_mode = insn.field_mode;
 478
 479           machine_mode wider_mode;
 480           while (iter.next_mode (&wider_mode)
 481                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 482             best_mode = wider_mode;
 483         }
 484       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 485                                    new_bitnum);
 486     }
 487   return NULL_RTX;
 488 }
 489
 490 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 491    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 492    offset is then BITNUM / BITS_PER_UNIT.  */
 493
 494 static bool
 495 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 496                      unsigned HOST_WIDE_INT bitsize,
 497                      machine_mode struct_mode)
 498 {
 499   if (BYTES_BIG_ENDIAN)
 500     return (bitnum % BITS_PER_UNIT == 0
 501             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 502                 || (bitnum + bitsize) % BITS_PER_WORD == 0));
 503   else
 504     return bitnum % BITS_PER_WORD == 0;
 505 }
 506
 507 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 508    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 509    Return false if the access would touch memory outside the range
 510    BITREGION_START to BITREGION_END for conformance to the C++ memory
 511    model.  */
 512
 513 static bool
 514 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 515                             unsigned HOST_WIDE_INT bitnum,
 516                             machine_mode fieldmode,
 517                             unsigned HOST_WIDE_INT bitregion_start,
 518                             unsigned HOST_WIDE_INT bitregion_end)
 519 {
 520   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 521
 522   /* -fstrict-volatile-bitfields must be enabled and we must have a
 523      volatile MEM.  */
 524   if (!MEM_P (op0)
 525       || !MEM_VOLATILE_P (op0)
 526       || flag_strict_volatile_bitfields <= 0)
 527     return false;
 528
 529   /* Non-integral modes likely only happen with packed structures.
 530      Punt.  */
 531   if (!SCALAR_INT_MODE_P (fieldmode))
 532     return false;
 533
 534   /* The bit size must not be larger than the field mode, and
 535      the field mode must not be larger than a word.  */
 536   if (bitsize > modesize || modesize > BITS_PER_WORD)
 537     return false;
 538
 539   /* Check for cases of unaligned fields that must be split.  */
 540   if (bitnum % modesize + bitsize > modesize)
 541     return false;
 542
 543   /* The memory must be sufficiently aligned for a MODESIZE access.
 544      This condition guarantees, that the memory access will not
 545      touch anything after the end of the structure.  */
 546   if (MEM_ALIGN (op0) < modesize)
 547     return false;
 548
 549   /* Check for cases where the C++ memory model applies.  */
 550   if (bitregion_end != 0
 551       && (bitnum - bitnum % modesize < bitregion_start
 552           || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
 553     return false;
 554
 555   return true;
 556 }
 557
 558 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 559    bit number BITNUM can be treated as a simple value of mode MODE.  */
 560
 561 static bool
 562 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 563                        unsigned HOST_WIDE_INT bitnum, machine_mode mode)
 564 {
 565   return (MEM_P (op0)
 566           && bitnum % BITS_PER_UNIT == 0
 567           && bitsize == GET_MODE_BITSIZE (mode)
 568           && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
 569               || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
 570                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 571 }
 572 \f
 573 /* Try to use instruction INSV to store VALUE into a field of OP0.
 574    BITSIZE and BITNUM are as for store_bit_field.  */
 575
 576 static bool
 577 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 578                             unsigned HOST_WIDE_INT bitsize,
 579                             unsigned HOST_WIDE_INT bitnum,
 580                             rtx value)
 581 {
 582   struct expand_operand ops[4];
 583   rtx value1;
 584   rtx xop0 = op0;
 585   rtx_insn *last = get_last_insn ();
 586   bool copy_back = false;
 587
 588   machine_mode op_mode = insv->field_mode;
 589   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 590   if (bitsize == 0 || bitsize > unit)
 591     return false;
 592
 593   if (MEM_P (xop0))
 594     /* Get a reference to the first byte of the field.  */
 595     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 596                                  &bitnum);
 597   else
 598     {
 599       /* Convert from counting within OP0 to counting in OP_MODE.  */
 600       if (BYTES_BIG_ENDIAN)
 601         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 602
 603       /* If xop0 is a register, we need it in OP_MODE
 604          to make it acceptable to the format of insv.  */
 605       if (GET_CODE (xop0) == SUBREG)
 606         /* We can't just change the mode, because this might clobber op0,
 607            and we will need the original value of op0 if insv fails.  */
 608         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 609       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 610         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 611     }
 612
 613   /* If the destination is a paradoxical subreg such that we need a
 614      truncate to the inner mode, perform the insertion on a temporary and
 615      truncate the result to the original destination.  Note that we can't
 616      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 617      X) 0)) is (reg:N X).  */
 618   if (GET_CODE (xop0) == SUBREG
 619       && REG_P (SUBREG_REG (xop0))
 620       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 621                                          op_mode))
 622     {
 623       rtx tem = gen_reg_rtx (op_mode);
 624       emit_move_insn (tem, xop0);
 625       xop0 = tem;
 626       copy_back = true;
 627     }
 628
 629   /* There are similar overflow check at the start of store_bit_field_1,
 630      but that only check the situation where the field lies completely
 631      outside the register, while there do have situation where the field
 632      lies partialy in the register, we need to adjust bitsize for this
 633      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
 634      will broken on those arch support bit insert instruction, like arm, aarch64
 635      etc.  */
 636   if (bitsize + bitnum > unit && bitnum < unit)
 637     {
 638       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
 639                "destination object, data truncated into %wu-bit",
 640                bitsize, unit - bitnum);
 641       bitsize = unit - bitnum;
 642     }
 643
 644   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 645      "backwards" from the size of the unit we are inserting into.
 646      Otherwise, we count bits from the most significant on a
 647      BYTES/BITS_BIG_ENDIAN machine.  */
 648
 649   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 650     bitnum = unit - bitsize - bitnum;
 651
 652   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 653   value1 = value;
 654   if (GET_MODE (value) != op_mode)
 655     {
 656       if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 657         {
 658           rtx tmp;
 659           /* Optimization: Don't bother really extending VALUE
 660              if it has all the bits we will actually use.  However,
 661              if we must narrow it, be sure we do it correctly.  */
 662
 663           if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 664             {
 665               tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 666               if (! tmp)
 667                 tmp = simplify_gen_subreg (op_mode,
 668                                            force_reg (GET_MODE (value),
 669                                                       value1),
 670                                            GET_MODE (value), 0);
 671             }
 672           else
 673             {
 674               tmp = gen_lowpart_if_possible (op_mode, value1);
 675               if (! tmp)
 676                 tmp = gen_lowpart (op_mode, force_reg (GET_MODE (value),
 677                                                        value1));
 678             }
 679           value1 = tmp;
 680         }
 681       else if (CONST_INT_P (value))
 682         value1 = gen_int_mode (INTVAL (value), op_mode);
 683       else
 684         /* Parse phase is supposed to make VALUE's data type
 685            match that of the component reference, which is a type
 686            at least as wide as the field; so VALUE should have
 687            a mode that corresponds to that type.  */
 688         gcc_assert (CONSTANT_P (value));
 689     }
 690
 691   create_fixed_operand (&ops[0], xop0);
 692   create_integer_operand (&ops[1], bitsize);
 693   create_integer_operand (&ops[2], bitnum);
 694   create_input_operand (&ops[3], value1, op_mode);
 695   if (maybe_expand_insn (insv->icode, 4, ops))
 696     {
 697       if (copy_back)
 698         convert_move (op0, xop0, true);
 699       return true;
 700     }
 701   delete_insns_since (last);
 702   return false;
 703 }
 704
 705 /* A subroutine of store_bit_field, with the same arguments.  Return true
 706    if the operation could be implemented.
 707
 708    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 709    no other way of implementing the operation.  If FALLBACK_P is false,
 710    return false instead.  */
 711
 712 static bool
 713 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 714                    unsigned HOST_WIDE_INT bitnum,
 715                    unsigned HOST_WIDE_INT bitregion_start,
 716                    unsigned HOST_WIDE_INT bitregion_end,
 717                    machine_mode fieldmode,
 718                    rtx value, bool reverse, bool fallback_p)
 719 {
 720   rtx op0 = str_rtx;
 721   rtx orig_value;
 722
 723   while (GET_CODE (op0) == SUBREG)
 724     {
 725       /* The following line once was done only if WORDS_BIG_ENDIAN,
 726          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 727          meaningful at a much higher level; when structures are copied
 728          between memory and regs, the higher-numbered regs
 729          always get higher addresses.  */
 730       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 731       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 732       int byte_offset = 0;
 733
 734       /* Paradoxical subregs need special handling on big-endian machines.  */
 735       if (paradoxical_subreg_p (op0))
 736         {
 737           int difference = inner_mode_size - outer_mode_size;
 738
 739           if (WORDS_BIG_ENDIAN)
 740             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 741           if (BYTES_BIG_ENDIAN)
 742             byte_offset += difference % UNITS_PER_WORD;
 743         }
 744       else
 745         byte_offset = SUBREG_BYTE (op0);
 746
 747       bitnum += byte_offset * BITS_PER_UNIT;
 748       op0 = SUBREG_REG (op0);
 749     }
 750
 751   /* No action is needed if the target is a register and if the field
 752      lies completely outside that register.  This can occur if the source
 753      code contains an out-of-bounds access to a small array.  */
 754   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 755     return true;
 756
 757   /* Use vec_set patterns for inserting parts of vectors whenever
 758      available.  */
 759   if (VECTOR_MODE_P (GET_MODE (op0))
 760       && !MEM_P (op0)
 761       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 762       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 763       && bitsize == GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
 764       && !(bitnum % GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
 765     {
 766       struct expand_operand ops[3];
 767       machine_mode outermode = GET_MODE (op0);
 768       machine_mode innermode = GET_MODE_INNER (outermode);
 769       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 770       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 771
 772       create_fixed_operand (&ops[0], op0);
 773       create_input_operand (&ops[1], value, innermode);
 774       create_integer_operand (&ops[2], pos);
 775       if (maybe_expand_insn (icode, 3, ops))
 776         return true;
 777     }
 778
 779   /* If the target is a register, overwriting the entire object, or storing
 780      a full-word or multi-word field can be done with just a SUBREG.  */
 781   if (!MEM_P (op0)
 782       && bitsize == GET_MODE_BITSIZE (fieldmode)
 783       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 784           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 785     {
 786       /* Use the subreg machinery either to narrow OP0 to the required
 787          words or to cope with mode punning between equal-sized modes.
 788          In the latter case, use subreg on the rhs side, not lhs.  */
 789       rtx sub;
 790
 791       if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
 792         {
 793           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 794           if (sub)
 795             {
 796               if (reverse)
 797                 sub = flip_storage_order (GET_MODE (op0), sub);
 798               emit_move_insn (op0, sub);
 799               return true;
 800             }
 801         }
 802       else
 803         {
 804           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 805                                      bitnum / BITS_PER_UNIT);
 806           if (sub)
 807             {
 808               if (reverse)
 809                 value = flip_storage_order (fieldmode, value);
 810               emit_move_insn (sub, value);
 811               return true;
 812             }
 813         }
 814     }
 815
 816   /* If the target is memory, storing any naturally aligned field can be
 817      done with a simple store.  For targets that support fast unaligned
 818      memory, any naturally sized, unit aligned field can be done directly.  */
 819   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
 820     {
 821       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 822       if (reverse)
 823         value = flip_storage_order (fieldmode, value);
 824       emit_move_insn (op0, value);
 825       return true;
 826     }
 827
 828   /* Make sure we are playing with integral modes.  Pun with subregs
 829      if we aren't.  This must come after the entire register case above,
 830      since that case is valid for any mode.  The following cases are only
 831      valid for integral modes.  */
 832   opt_scalar_int_mode opt_imode = int_mode_for_mode (GET_MODE (op0));
 833   scalar_int_mode imode;
 834   if (!opt_imode.exists (&imode) || imode != GET_MODE (op0))
 835     {
 836       if (MEM_P (op0))
 837         op0 = adjust_bitfield_address_size (op0, opt_imode.else_blk (),
 838                                             0, MEM_SIZE (op0));
 839       else
 840         op0 = gen_lowpart (op0_mode.require (), op0);
 841     }
 842
 843   /* Storing an lsb-aligned field in a register
 844      can be done with a movstrict instruction.  */
 845
 846   if (!MEM_P (op0)
 847       && !reverse
 848       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 849       && bitsize == GET_MODE_BITSIZE (fieldmode)
 850       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 851     {
 852       struct expand_operand ops[2];
 853       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 854       rtx arg0 = op0;
 855       unsigned HOST_WIDE_INT subreg_off;
 856
 857       if (GET_CODE (arg0) == SUBREG)
 858         {
 859           /* Else we've got some float mode source being extracted into
 860              a different float mode destination -- this combination of
 861              subregs results in Severe Tire Damage.  */
 862           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 863                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 864                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 865           arg0 = SUBREG_REG (arg0);
 866         }
 867
 868       subreg_off = bitnum / BITS_PER_UNIT;
 869       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 870         {
 871           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 872
 873           create_fixed_operand (&ops[0], arg0);
 874           /* Shrink the source operand to FIELDMODE.  */
 875           create_convert_operand_to (&ops[1], value, fieldmode, false);
 876           if (maybe_expand_insn (icode, 2, ops))
 877             return true;
 878         }
 879     }
 880
 881   /* Handle fields bigger than a word.  */
 882
 883   if (bitsize > BITS_PER_WORD)
 884     {
 885       /* Here we transfer the words of the field
 886          in the order least significant first.
 887          This is because the most significant word is the one which may
 888          be less than full.
 889          However, only do that if the value is not BLKmode.  */
 890
 891       const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 892       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 893       unsigned int i;
 894       rtx_insn *last;
 895
 896       /* This is the mode we must force value to, so that there will be enough
 897          subwords to extract.  Note that fieldmode will often (always?) be
 898          VOIDmode, because that is what store_field uses to indicate that this
 899          is a bit field, but passing VOIDmode to operand_subword_force
 900          is not allowed.  */
 901       fieldmode = GET_MODE (value);
 902       if (fieldmode == VOIDmode)
 903         fieldmode = smallest_int_mode_for_size (nwords * BITS_PER_WORD);
 904
 905       last = get_last_insn ();
 906       for (i = 0; i < nwords; i++)
 907         {
 908           /* If I is 0, use the low-order word in both field and target;
 909              if I is 1, use the next to lowest word; and so on.  */
 910           unsigned int wordnum = (backwards
 911                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 912                                   - i - 1
 913                                   : i);
 914           unsigned int bit_offset = (backwards ^ reverse
 915                                      ? MAX ((int) bitsize - ((int) i + 1)
 916                                             * BITS_PER_WORD,
 917                                             0)
 918                                      : (int) i * BITS_PER_WORD);
 919           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 920           unsigned HOST_WIDE_INT new_bitsize =
 921             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 922
 923           /* If the remaining chunk doesn't have full wordsize we have
 924              to make sure that for big-endian machines the higher order
 925              bits are used.  */
 926           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 927             value_word = simplify_expand_binop (word_mode, lshr_optab,
 928                                                 value_word,
 929                                                 GEN_INT (BITS_PER_WORD
 930                                                          - new_bitsize),
 931                                                 NULL_RTX, true,
 932                                                 OPTAB_LIB_WIDEN);
 933
 934           if (!store_bit_field_1 (op0, new_bitsize,
 935                                   bitnum + bit_offset,
 936                                   bitregion_start, bitregion_end,
 937                                   word_mode,
 938                                   value_word, reverse, fallback_p))
 939             {
 940               delete_insns_since (last);
 941               return false;
 942             }
 943         }
 944       return true;
 945     }
 946
 947   /* If VALUE has a floating-point or complex mode, access it as an
 948      integer of the corresponding size.  This can occur on a machine
 949      with 64 bit registers that uses SFmode for float.  It can also
 950      occur for unaligned float or complex fields.  */
 951   orig_value = value;
 952   if (GET_MODE (value) != VOIDmode
 953       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 954       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 955     {
 956       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)).require ());
 957       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 958     }
 959
 960   /* If OP0 is a multi-word register, narrow it to the affected word.
 961      If the region spans two words, defer to store_split_bit_field.
 962      Don't do this if op0 is a single hard register wider than word
 963      such as a float or vector register.  */
 964   if (!MEM_P (op0)
 965       && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD
 966       && (!REG_P (op0)
 967           || !HARD_REGISTER_P (op0)
 968           || HARD_REGNO_NREGS (REGNO (op0), GET_MODE (op0)) != 1))
 969     {
 970       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
 971         {
 972           if (!fallback_p)
 973             return false;
 974
 975           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 976                                  bitregion_end, value, reverse);
 977           return true;
 978         }
 979       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
 980                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 981       gcc_assert (op0);
 982       bitnum %= BITS_PER_WORD;
 983     }
 984
 985   /* From here on we can assume that the field to be stored in fits
 986      within a word.  If the destination is a register, it too fits
 987      in a word.  */
 988
 989   extraction_insn insv;
 990   if (!MEM_P (op0)
 991       && !reverse
 992       && get_best_reg_extraction_insn (&insv, EP_insv,
 993                                        GET_MODE_BITSIZE (GET_MODE (op0)),
 994                                        fieldmode)
 995       && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 996     return true;
 997
 998   /* If OP0 is a memory, try copying it to a register and seeing if a
 999      cheap register alternative is available.  */
1000   if (MEM_P (op0) && !reverse)
1001     {
1002       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1003                                         fieldmode)
1004           && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
1005         return true;
1006
1007       rtx_insn *last = get_last_insn ();
1008
1009       /* Try loading part of OP0 into a register, inserting the bitfield
1010          into that, and then copying the result back to OP0.  */
1011       unsigned HOST_WIDE_INT bitpos;
1012       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1013                                                bitregion_start, bitregion_end,
1014                                                fieldmode, &bitpos);
1015       if (xop0)
1016         {
1017           rtx tempreg = copy_to_reg (xop0);
1018           if (store_bit_field_1 (tempreg, bitsize, bitpos,
1019                                  bitregion_start, bitregion_end,
1020                                  fieldmode, orig_value, reverse, false))
1021             {
1022               emit_move_insn (xop0, tempreg);
1023               return true;
1024             }
1025           delete_insns_since (last);
1026         }
1027     }
1028
1029   if (!fallback_p)
1030     return false;
1031
1032   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
1033                          bitregion_end, value, reverse);
1034   return true;
1035 }
1036
1037 /* Generate code to store value from rtx VALUE
1038    into a bit-field within structure STR_RTX
1039    containing BITSIZE bits starting at bit BITNUM.
1040
1041    BITREGION_START is bitpos of the first bitfield in this region.
1042    BITREGION_END is the bitpos of the ending bitfield in this region.
1043    These two fields are 0, if the C++ memory model does not apply,
1044    or we are not interested in keeping track of bitfield regions.
1045
1046    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1047
1048    If REVERSE is true, the store is to be done in reverse order.  */
1049
1050 void
1051 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1052                  unsigned HOST_WIDE_INT bitnum,
1053                  unsigned HOST_WIDE_INT bitregion_start,
1054                  unsigned HOST_WIDE_INT bitregion_end,
1055                  machine_mode fieldmode,
1056                  rtx value, bool reverse)
1057 {
1058   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1059   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
1060                                   bitregion_start, bitregion_end))
1061     {
1062       /* Storing of a full word can be done with a simple store.
1063          We know here that the field can be accessed with one single
1064          instruction.  For targets that support unaligned memory,
1065          an unaligned access may be necessary.  */
1066       if (bitsize == GET_MODE_BITSIZE (fieldmode))
1067         {
1068           str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
1069                                              bitnum / BITS_PER_UNIT);
1070           if (reverse)
1071             value = flip_storage_order (fieldmode, value);
1072           gcc_assert (bitnum % BITS_PER_UNIT == 0);
1073           emit_move_insn (str_rtx, value);
1074         }
1075       else
1076         {
1077           rtx temp;
1078
1079           str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
1080                                           &bitnum);
1081           gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (fieldmode));
1082           temp = copy_to_reg (str_rtx);
1083           if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
1084                                   fieldmode, value, reverse, true))
1085             gcc_unreachable ();
1086
1087           emit_move_insn (str_rtx, temp);
1088         }
1089
1090       return;
1091     }
1092
1093   /* Under the C++0x memory model, we must not touch bits outside the
1094      bit region.  Adjust the address to start at the beginning of the
1095      bit region.  */
1096   if (MEM_P (str_rtx) && bitregion_start > 0)
1097     {
1098       machine_mode bestmode;
1099       HOST_WIDE_INT offset, size;
1100
1101       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1102
1103       offset = bitregion_start / BITS_PER_UNIT;
1104       bitnum -= bitregion_start;
1105       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1106       bitregion_end -= bitregion_start;
1107       bitregion_start = 0;
1108       bestmode = get_best_mode (bitsize, bitnum,
1109                                 bitregion_start, bitregion_end,
1110                                 MEM_ALIGN (str_rtx), VOIDmode,
1111                                 MEM_VOLATILE_P (str_rtx));
1112       str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
1113     }
1114
1115   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1116                           bitregion_start, bitregion_end,
1117                           fieldmode, value, reverse, true))
1118     gcc_unreachable ();
1119 }
1120 \f
1121 /* Use shifts and boolean operations to store VALUE into a bit field of
1122    width BITSIZE in OP0, starting at bit BITNUM.
1123
1124    If REVERSE is true, the store is to be done in reverse order.  */
1125
1126 static void
1127 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1128                        unsigned HOST_WIDE_INT bitnum,
1129                        unsigned HOST_WIDE_INT bitregion_start,
1130                        unsigned HOST_WIDE_INT bitregion_end,
1131                        rtx value, bool reverse)
1132 {
1133   /* There is a case not handled here:
1134      a structure with a known alignment of just a halfword
1135      and a field split across two aligned halfwords within the structure.
1136      Or likewise a structure with a known alignment of just a byte
1137      and a field split across two bytes.
1138      Such cases are not supposed to be able to occur.  */
1139
1140   if (MEM_P (op0))
1141     {
1142       machine_mode mode = GET_MODE (op0);
1143       if (GET_MODE_BITSIZE (mode) == 0
1144           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1145         mode = word_mode;
1146       mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1147                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1148
1149       if (mode == VOIDmode)
1150         {
1151           /* The only way this should occur is if the field spans word
1152              boundaries.  */
1153           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1154                                  bitregion_end, value, reverse);
1155           return;
1156         }
1157
1158       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1159     }
1160
1161   store_fixed_bit_field_1 (op0, bitsize, bitnum, value, reverse);
1162 }
1163
1164 /* Helper function for store_fixed_bit_field, stores
1165    the bit field always using the MODE of OP0.  */
1166
1167 static void
1168 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1169                          unsigned HOST_WIDE_INT bitnum,
1170                          rtx value, bool reverse)
1171 {
1172   machine_mode mode;
1173   rtx temp;
1174   int all_zero = 0;
1175   int all_one = 0;
1176
1177   mode = GET_MODE (op0);
1178   gcc_assert (SCALAR_INT_MODE_P (mode));
1179
1180   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1181      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1182
1183   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1184     /* BITNUM is the distance between our msb
1185        and that of the containing datum.
1186        Convert it to the distance from the lsb.  */
1187     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1188
1189   /* Now BITNUM is always the distance between our lsb
1190      and that of OP0.  */
1191
1192   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1193      we must first convert its mode to MODE.  */
1194
1195   if (CONST_INT_P (value))
1196     {
1197       unsigned HOST_WIDE_INT v = UINTVAL (value);
1198
1199       if (bitsize < HOST_BITS_PER_WIDE_INT)
1200         v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1201
1202       if (v == 0)
1203         all_zero = 1;
1204       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1205                 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1206                || (bitsize == HOST_BITS_PER_WIDE_INT
1207                    && v == HOST_WIDE_INT_M1U))
1208         all_one = 1;
1209
1210       value = lshift_value (mode, v, bitnum);
1211     }
1212   else
1213     {
1214       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1215                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1216
1217       if (GET_MODE (value) != mode)
1218         value = convert_to_mode (mode, value, 1);
1219
1220       if (must_and)
1221         value = expand_binop (mode, and_optab, value,
1222                               mask_rtx (mode, 0, bitsize, 0),
1223                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1224       if (bitnum > 0)
1225         value = expand_shift (LSHIFT_EXPR, mode, value,
1226                               bitnum, NULL_RTX, 1);
1227     }
1228
1229   if (reverse)
1230     value = flip_storage_order (mode, value);
1231
1232   /* Now clear the chosen bits in OP0,
1233      except that if VALUE is -1 we need not bother.  */
1234   /* We keep the intermediates in registers to allow CSE to combine
1235      consecutive bitfield assignments.  */
1236
1237   temp = force_reg (mode, op0);
1238
1239   if (! all_one)
1240     {
1241       rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1242       if (reverse)
1243         mask = flip_storage_order (mode, mask);
1244       temp = expand_binop (mode, and_optab, temp, mask,
1245                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1246       temp = force_reg (mode, temp);
1247     }
1248
1249   /* Now logical-or VALUE into OP0, unless it is zero.  */
1250
1251   if (! all_zero)
1252     {
1253       temp = expand_binop (mode, ior_optab, temp, value,
1254                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1255       temp = force_reg (mode, temp);
1256     }
1257
1258   if (op0 != temp)
1259     {
1260       op0 = copy_rtx (op0);
1261       emit_move_insn (op0, temp);
1262     }
1263 }
1264 \f
1265 /* Store a bit field that is split across multiple accessible memory objects.
1266
1267    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1268    BITSIZE is the field width; BITPOS the position of its first bit
1269    (within the word).
1270    VALUE is the value to store.
1271
1272    If REVERSE is true, the store is to be done in reverse order.
1273
1274    This does not yet handle fields wider than BITS_PER_WORD.  */
1275
1276 static void
1277 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1278                        unsigned HOST_WIDE_INT bitpos,
1279                        unsigned HOST_WIDE_INT bitregion_start,
1280                        unsigned HOST_WIDE_INT bitregion_end,
1281                        rtx value, bool reverse)
1282 {
1283   unsigned int unit, total_bits, bitsdone = 0;
1284
1285   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1286      much at a time.  */
1287   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1288     unit = BITS_PER_WORD;
1289   else
1290     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1291
1292   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1293      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1294      again, and we will mutually recurse forever.  */
1295   if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1296     unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1297
1298   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1299      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1300      that VALUE might be a floating-point constant.  */
1301   if (CONSTANT_P (value) && !CONST_INT_P (value))
1302     {
1303       rtx word = gen_lowpart_common (word_mode, value);
1304
1305       if (word && (value != word))
1306         value = word;
1307       else
1308         value = gen_lowpart_common (word_mode,
1309                                     force_reg (GET_MODE (value) != VOIDmode
1310                                                ? GET_MODE (value)
1311                                                : word_mode, value));
1312     }
1313
1314   total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1315
1316   while (bitsdone < bitsize)
1317     {
1318       unsigned HOST_WIDE_INT thissize;
1319       unsigned HOST_WIDE_INT thispos;
1320       unsigned HOST_WIDE_INT offset;
1321       rtx part, word;
1322
1323       offset = (bitpos + bitsdone) / unit;
1324       thispos = (bitpos + bitsdone) % unit;
1325
1326       /* When region of bytes we can touch is restricted, decrease
1327          UNIT close to the end of the region as needed.  If op0 is a REG
1328          or SUBREG of REG, don't do this, as there can't be data races
1329          on a register and we can expand shorter code in some cases.  */
1330       if (bitregion_end
1331           && unit > BITS_PER_UNIT
1332           && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1333           && !REG_P (op0)
1334           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1335         {
1336           unit = unit / 2;
1337           continue;
1338         }
1339
1340       /* THISSIZE must not overrun a word boundary.  Otherwise,
1341          store_fixed_bit_field will call us again, and we will mutually
1342          recurse forever.  */
1343       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1344       thissize = MIN (thissize, unit - thispos);
1345
1346       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1347         {
1348           /* Fetch successively less significant portions.  */
1349           if (CONST_INT_P (value))
1350             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1351                              >> (bitsize - bitsdone - thissize))
1352                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1353           /* Likewise, but the source is little-endian.  */
1354           else if (reverse)
1355             part = extract_fixed_bit_field (word_mode, value, thissize,
1356                                             bitsize - bitsdone - thissize,
1357                                             NULL_RTX, 1, false);
1358           else
1359             {
1360               int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1361               /* The args are chosen so that the last part includes the
1362                  lsb.  Give extract_bit_field the value it needs (with
1363                  endianness compensation) to fetch the piece we want.  */
1364               part = extract_fixed_bit_field (word_mode, value, thissize,
1365                                               total_bits - bitsize + bitsdone,
1366                                               NULL_RTX, 1, false);
1367             }
1368         }
1369       else
1370         {
1371           /* Fetch successively more significant portions.  */
1372           if (CONST_INT_P (value))
1373             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1374                              >> bitsdone)
1375                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1376           /* Likewise, but the source is big-endian.  */
1377           else if (reverse)
1378             part = extract_fixed_bit_field (word_mode, value, thissize,
1379                                             total_bits - bitsdone - thissize,
1380                                             NULL_RTX, 1, false);
1381           else
1382             part = extract_fixed_bit_field (word_mode, value, thissize,
1383                                             bitsdone, NULL_RTX, 1, false);
1384         }
1385
1386       /* If OP0 is a register, then handle OFFSET here.  */
1387       if (SUBREG_P (op0) || REG_P (op0))
1388         {
1389           machine_mode op0_mode = GET_MODE (op0);
1390           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1391             word = offset ? const0_rtx : op0;
1392           else
1393             word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1394                                           GET_MODE (op0));
1395           offset &= BITS_PER_WORD / unit - 1;
1396         }
1397       else
1398         word = op0;
1399
1400       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1401          it is just an out-of-bounds access.  Ignore it.  */
1402       if (word != const0_rtx)
1403         store_fixed_bit_field (word, thissize, offset * unit + thispos,
1404                                bitregion_start, bitregion_end, part,
1405                                reverse);
1406       bitsdone += thissize;
1407     }
1408 }
1409 \f
1410 /* A subroutine of extract_bit_field_1 that converts return value X
1411    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1412    to extract_bit_field.  */
1413
1414 static rtx
1415 convert_extracted_bit_field (rtx x, machine_mode mode,
1416                              machine_mode tmode, bool unsignedp)
1417 {
1418   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1419     return x;
1420
1421   /* If the x mode is not a scalar integral, first convert to the
1422      integer mode of that size and then access it as a floating-point
1423      value via a SUBREG.  */
1424   if (!SCALAR_INT_MODE_P (tmode))
1425     {
1426       scalar_int_mode int_mode = int_mode_for_mode (tmode).require ();
1427       x = convert_to_mode (int_mode, x, unsignedp);
1428       x = force_reg (int_mode, x);
1429       return gen_lowpart (tmode, x);
1430     }
1431
1432   return convert_to_mode (tmode, x, unsignedp);
1433 }
1434
1435 /* Try to use an ext(z)v pattern to extract a field from OP0.
1436    Return the extracted value on success, otherwise return null.
1437    EXT_MODE is the mode of the extraction and the other arguments
1438    are as for extract_bit_field.  */
1439
1440 static rtx
1441 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1442                               unsigned HOST_WIDE_INT bitsize,
1443                               unsigned HOST_WIDE_INT bitnum,
1444                               int unsignedp, rtx target,
1445                               machine_mode mode, machine_mode tmode)
1446 {
1447   struct expand_operand ops[4];
1448   rtx spec_target = target;
1449   rtx spec_target_subreg = 0;
1450   machine_mode ext_mode = extv->field_mode;
1451   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1452
1453   if (bitsize == 0 || unit < bitsize)
1454     return NULL_RTX;
1455
1456   if (MEM_P (op0))
1457     /* Get a reference to the first byte of the field.  */
1458     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1459                                 &bitnum);
1460   else
1461     {
1462       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1463       if (BYTES_BIG_ENDIAN)
1464         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1465
1466       /* If op0 is a register, we need it in EXT_MODE to make it
1467          acceptable to the format of ext(z)v.  */
1468       if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1469         return NULL_RTX;
1470       if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1471         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1472     }
1473
1474   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1475      "backwards" from the size of the unit we are extracting from.
1476      Otherwise, we count bits from the most significant on a
1477      BYTES/BITS_BIG_ENDIAN machine.  */
1478
1479   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1480     bitnum = unit - bitsize - bitnum;
1481
1482   if (target == 0)
1483     target = spec_target = gen_reg_rtx (tmode);
1484
1485   if (GET_MODE (target) != ext_mode)
1486     {
1487       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1488          between the mode of the extraction (word_mode) and the target
1489          mode.  Instead, create a temporary and use convert_move to set
1490          the target.  */
1491       if (REG_P (target)
1492           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1493         {
1494           target = gen_lowpart (ext_mode, target);
1495           if (GET_MODE_PRECISION (ext_mode)
1496               > GET_MODE_PRECISION (GET_MODE (spec_target)))
1497             spec_target_subreg = target;
1498         }
1499       else
1500         target = gen_reg_rtx (ext_mode);
1501     }
1502
1503   create_output_operand (&ops[0], target, ext_mode);
1504   create_fixed_operand (&ops[1], op0);
1505   create_integer_operand (&ops[2], bitsize);
1506   create_integer_operand (&ops[3], bitnum);
1507   if (maybe_expand_insn (extv->icode, 4, ops))
1508     {
1509       target = ops[0].value;
1510       if (target == spec_target)
1511         return target;
1512       if (target == spec_target_subreg)
1513         return spec_target;
1514       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1515     }
1516   return NULL_RTX;
1517 }
1518
1519 /* A subroutine of extract_bit_field, with the same arguments.
1520    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1521    if we can find no other means of implementing the operation.
1522    if FALLBACK_P is false, return NULL instead.  */
1523
1524 static rtx
1525 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1526                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1527                      machine_mode mode, machine_mode tmode,
1528                      bool reverse, bool fallback_p, rtx *alt_rtl)
1529 {
1530   rtx op0 = str_rtx;
1531   machine_mode mode1;
1532
1533   if (tmode == VOIDmode)
1534     tmode = mode;
1535
1536   while (GET_CODE (op0) == SUBREG)
1537     {
1538       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1539       op0 = SUBREG_REG (op0);
1540     }
1541
1542   /* If we have an out-of-bounds access to a register, just return an
1543      uninitialized register of the required mode.  This can occur if the
1544      source code contains an out-of-bounds access to a small array.  */
1545   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1546     return gen_reg_rtx (tmode);
1547
1548   if (REG_P (op0)
1549       && mode == GET_MODE (op0)
1550       && bitnum == 0
1551       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1552     {
1553       if (reverse)
1554         op0 = flip_storage_order (mode, op0);
1555       /* We're trying to extract a full register from itself.  */
1556       return op0;
1557     }
1558
1559   /* First try to check for vector from vector extractions.  */
1560   if (VECTOR_MODE_P (GET_MODE (op0))
1561       && !MEM_P (op0)
1562       && VECTOR_MODE_P (tmode)
1563       && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (tmode))
1564     {
1565       machine_mode new_mode = GET_MODE (op0);
1566       if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1567         {
1568           new_mode = mode_for_vector (GET_MODE_INNER (tmode),
1569                                       GET_MODE_BITSIZE (GET_MODE (op0))
1570                                       / GET_MODE_UNIT_BITSIZE (tmode));
1571           if (!VECTOR_MODE_P (new_mode)
1572               || GET_MODE_SIZE (new_mode) != GET_MODE_SIZE (GET_MODE (op0))
1573               || GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode)
1574               || !targetm.vector_mode_supported_p (new_mode))
1575             new_mode = VOIDmode;
1576         }
1577       if (new_mode != VOIDmode
1578           && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1579               != CODE_FOR_nothing)
1580           && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (tmode)
1581               == bitnum / GET_MODE_BITSIZE (tmode)))
1582         {
1583           struct expand_operand ops[3];
1584           machine_mode outermode = new_mode;
1585           machine_mode innermode = tmode;
1586           enum insn_code icode
1587             = convert_optab_handler (vec_extract_optab, outermode, innermode);
1588           unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1589
1590           if (new_mode != GET_MODE (op0))
1591             op0 = gen_lowpart (new_mode, op0);
1592           create_output_operand (&ops[0], target, innermode);
1593           ops[0].target = 1;
1594           create_input_operand (&ops[1], op0, outermode);
1595           create_integer_operand (&ops[2], pos);
1596           if (maybe_expand_insn (icode, 3, ops))
1597             {
1598               if (alt_rtl && ops[0].target)
1599                 *alt_rtl = target;
1600               target = ops[0].value;
1601               if (GET_MODE (target) != mode)
1602                 return gen_lowpart (tmode, target);
1603               return target;
1604             }
1605         }
1606     }
1607
1608   /* See if we can get a better vector mode before extracting.  */
1609   if (VECTOR_MODE_P (GET_MODE (op0))
1610       && !MEM_P (op0)
1611       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1612     {
1613       machine_mode new_mode;
1614
1615       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1616         new_mode = MIN_MODE_VECTOR_FLOAT;
1617       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1618         new_mode = MIN_MODE_VECTOR_FRACT;
1619       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1620         new_mode = MIN_MODE_VECTOR_UFRACT;
1621       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1622         new_mode = MIN_MODE_VECTOR_ACCUM;
1623       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1624         new_mode = MIN_MODE_VECTOR_UACCUM;
1625       else
1626         new_mode = MIN_MODE_VECTOR_INT;
1627
1628       FOR_EACH_MODE_FROM (new_mode, new_mode)
1629         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1630             && GET_MODE_UNIT_SIZE (new_mode) == GET_MODE_SIZE (tmode)
1631             && targetm.vector_mode_supported_p (new_mode))
1632           break;
1633       if (new_mode != VOIDmode)
1634         op0 = gen_lowpart (new_mode, op0);
1635     }
1636
1637   /* Use vec_extract patterns for extracting parts of vectors whenever
1638      available.  */
1639   if (VECTOR_MODE_P (GET_MODE (op0))
1640       && !MEM_P (op0)
1641       && (convert_optab_handler (vec_extract_optab, GET_MODE (op0),
1642                                  GET_MODE_INNER (GET_MODE (op0)))
1643           != CODE_FOR_nothing)
1644       && ((bitnum + bitsize - 1) / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
1645           == bitnum / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
1646     {
1647       struct expand_operand ops[3];
1648       machine_mode outermode = GET_MODE (op0);
1649       machine_mode innermode = GET_MODE_INNER (outermode);
1650       enum insn_code icode
1651         = convert_optab_handler (vec_extract_optab, outermode, innermode);
1652       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1653
1654       create_output_operand (&ops[0], target, innermode);
1655       ops[0].target = 1;
1656       create_input_operand (&ops[1], op0, outermode);
1657       create_integer_operand (&ops[2], pos);
1658       if (maybe_expand_insn (icode, 3, ops))
1659         {
1660           if (alt_rtl && ops[0].target)
1661             *alt_rtl = target;
1662           target = ops[0].value;
1663           if (GET_MODE (target) != mode)
1664             return gen_lowpart (tmode, target);
1665           return target;
1666         }
1667     }
1668
1669   /* Make sure we are playing with integral modes.  Pun with subregs
1670      if we aren't.  */
1671   opt_scalar_int_mode opt_imode = int_mode_for_mode (GET_MODE (op0));
1672   scalar_int_mode imode;
1673   if (!opt_imode.exists (&imode) || imode != GET_MODE (op0))
1674     {
1675       if (MEM_P (op0))
1676         op0 = adjust_bitfield_address_size (op0, opt_imode.else_blk (),
1677                                             0, MEM_SIZE (op0));
1678       else if (opt_imode.exists (&imode))
1679         {
1680           op0 = gen_lowpart (imode, op0);
1681
1682           /* If we got a SUBREG, force it into a register since we
1683              aren't going to be able to do another SUBREG on it.  */
1684           if (GET_CODE (op0) == SUBREG)
1685             op0 = force_reg (imode, op0);
1686         }
1687       else
1688         {
1689           HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1690           rtx mem = assign_stack_temp (GET_MODE (op0), size);
1691           emit_move_insn (mem, op0);
1692           op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1693         }
1694     }
1695
1696   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1697      If that's wrong, the solution is to test for it and set TARGET to 0
1698      if needed.  */
1699
1700   /* Get the mode of the field to use for atomic access or subreg
1701      conversion.  */
1702   mode1 = mode;
1703   if (SCALAR_INT_MODE_P (tmode))
1704     {
1705       machine_mode try_mode = mode_for_size (bitsize,
1706                                                   GET_MODE_CLASS (tmode), 0);
1707       if (try_mode != BLKmode)
1708         mode1 = try_mode;
1709     }
1710   gcc_assert (mode1 != BLKmode);
1711
1712   /* Extraction of a full MODE1 value can be done with a subreg as long
1713      as the least significant bit of the value is the least significant
1714      bit of either OP0 or a word of OP0.  */
1715   if (!MEM_P (op0)
1716       && !reverse
1717       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1718       && bitsize == GET_MODE_BITSIZE (mode1)
1719       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1720     {
1721       rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1722                                      bitnum / BITS_PER_UNIT);
1723       if (sub)
1724         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1725     }
1726
1727   /* Extraction of a full MODE1 value can be done with a load as long as
1728      the field is on a byte boundary and is sufficiently aligned.  */
1729   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1730     {
1731       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1732       if (reverse)
1733         op0 = flip_storage_order (mode1, op0);
1734       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1735     }
1736
1737   /* Handle fields bigger than a word.  */
1738
1739   if (bitsize > BITS_PER_WORD)
1740     {
1741       /* Here we transfer the words of the field
1742          in the order least significant first.
1743          This is because the most significant word is the one which may
1744          be less than full.  */
1745
1746       const bool backwards = WORDS_BIG_ENDIAN;
1747       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1748       unsigned int i;
1749       rtx_insn *last;
1750
1751       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1752         target = gen_reg_rtx (mode);
1753
1754       /* In case we're about to clobber a base register or something
1755          (see gcc.c-torture/execute/20040625-1.c).   */
1756       if (reg_mentioned_p (target, str_rtx))
1757         target = gen_reg_rtx (mode);
1758
1759       /* Indicate for flow that the entire target reg is being set.  */
1760       emit_clobber (target);
1761
1762       last = get_last_insn ();
1763       for (i = 0; i < nwords; i++)
1764         {
1765           /* If I is 0, use the low-order word in both field and target;
1766              if I is 1, use the next to lowest word; and so on.  */
1767           /* Word number in TARGET to use.  */
1768           unsigned int wordnum
1769             = (backwards
1770                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1771                : i);
1772           /* Offset from start of field in OP0.  */
1773           unsigned int bit_offset = (backwards ^ reverse
1774                                      ? MAX ((int) bitsize - ((int) i + 1)
1775                                             * BITS_PER_WORD,
1776                                             0)
1777                                      : (int) i * BITS_PER_WORD);
1778           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1779           rtx result_part
1780             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1781                                              bitsize - i * BITS_PER_WORD),
1782                                    bitnum + bit_offset, 1, target_part,
1783                                    mode, word_mode, reverse, fallback_p, NULL);
1784
1785           gcc_assert (target_part);
1786           if (!result_part)
1787             {
1788               delete_insns_since (last);
1789               return NULL;
1790             }
1791
1792           if (result_part != target_part)
1793             emit_move_insn (target_part, result_part);
1794         }
1795
1796       if (unsignedp)
1797         {
1798           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1799              need to be zero'd out.  */
1800           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1801             {
1802               unsigned int i, total_words;
1803
1804               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1805               for (i = nwords; i < total_words; i++)
1806                 emit_move_insn
1807                   (operand_subword (target,
1808                                     backwards ? total_words - i - 1 : i,
1809                                     1, VOIDmode),
1810                    const0_rtx);
1811             }
1812           return target;
1813         }
1814
1815       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1816       target = expand_shift (LSHIFT_EXPR, mode, target,
1817                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1818       return expand_shift (RSHIFT_EXPR, mode, target,
1819                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1820     }
1821
1822   /* If OP0 is a multi-word register, narrow it to the affected word.
1823      If the region spans two words, defer to extract_split_bit_field.  */
1824   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1825     {
1826       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1827         {
1828           if (!fallback_p)
1829             return NULL_RTX;
1830           target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1831                                             reverse);
1832           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1833         }
1834       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1835                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1836       bitnum %= BITS_PER_WORD;
1837     }
1838
1839   /* From here on we know the desired field is smaller than a word.
1840      If OP0 is a register, it too fits within a word.  */
1841   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1842   extraction_insn extv;
1843   if (!MEM_P (op0)
1844       && !reverse
1845       /* ??? We could limit the structure size to the part of OP0 that
1846          contains the field, with appropriate checks for endianness
1847          and TRULY_NOOP_TRUNCATION.  */
1848       && get_best_reg_extraction_insn (&extv, pattern,
1849                                        GET_MODE_BITSIZE (GET_MODE (op0)),
1850                                        tmode))
1851     {
1852       rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1853                                                  unsignedp, target, mode,
1854                                                  tmode);
1855       if (result)
1856         return result;
1857     }
1858
1859   /* If OP0 is a memory, try copying it to a register and seeing if a
1860      cheap register alternative is available.  */
1861   if (MEM_P (op0) & !reverse)
1862     {
1863       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1864                                         tmode))
1865         {
1866           rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1867                                                      bitnum, unsignedp,
1868                                                      target, mode,
1869                                                      tmode);
1870           if (result)
1871             return result;
1872         }
1873
1874       rtx_insn *last = get_last_insn ();
1875
1876       /* Try loading part of OP0 into a register and extracting the
1877          bitfield from that.  */
1878       unsigned HOST_WIDE_INT bitpos;
1879       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1880                                                0, 0, tmode, &bitpos);
1881       if (xop0)
1882         {
1883           xop0 = copy_to_reg (xop0);
1884           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1885                                             unsignedp, target,
1886                                             mode, tmode, reverse, false, NULL);
1887           if (result)
1888             return result;
1889           delete_insns_since (last);
1890         }
1891     }
1892
1893   if (!fallback_p)
1894     return NULL;
1895
1896   /* Find a correspondingly-sized integer field, so we can apply
1897      shifts and masks to it.  */
1898   scalar_int_mode int_mode;
1899   if (!int_mode_for_mode (tmode).exists (&int_mode))
1900     /* If this fails, we should probably push op0 out to memory and then
1901        do a load.  */
1902     int_mode = int_mode_for_mode (mode).require ();
1903
1904   target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum, target,
1905                                     unsignedp, reverse);
1906
1907   /* Complex values must be reversed piecewise, so we need to undo the global
1908      reversal, convert to the complex mode and reverse again.  */
1909   if (reverse && COMPLEX_MODE_P (tmode))
1910     {
1911       target = flip_storage_order (int_mode, target);
1912       target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1913       target = flip_storage_order (tmode, target);
1914     }
1915   else
1916     target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1917
1918   return target;
1919 }
1920
1921 /* Generate code to extract a byte-field from STR_RTX
1922    containing BITSIZE bits, starting at BITNUM,
1923    and put it in TARGET if possible (if TARGET is nonzero).
1924    Regardless of TARGET, we return the rtx for where the value is placed.
1925
1926    STR_RTX is the structure containing the byte (a REG or MEM).
1927    UNSIGNEDP is nonzero if this is an unsigned bit field.
1928    MODE is the natural mode of the field value once extracted.
1929    TMODE is the mode the caller would like the value to have;
1930    but the value may be returned with type MODE instead.
1931
1932    If REVERSE is true, the extraction is to be done in reverse order.
1933
1934    If a TARGET is specified and we can store in it at no extra cost,
1935    we do so, and return TARGET.
1936    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1937    if they are equally easy.  */
1938
1939 rtx
1940 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1941                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1942                    machine_mode mode, machine_mode tmode, bool reverse,
1943                    rtx *alt_rtl)
1944 {
1945   machine_mode mode1;
1946
1947   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1948   if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1949     mode1 = GET_MODE (str_rtx);
1950   else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1951     mode1 = GET_MODE (target);
1952   else
1953     mode1 = tmode;
1954
1955   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1956     {
1957       /* Extraction of a full MODE1 value can be done with a simple load.
1958          We know here that the field can be accessed with one single
1959          instruction.  For targets that support unaligned memory,
1960          an unaligned access may be necessary.  */
1961       if (bitsize == GET_MODE_BITSIZE (mode1))
1962         {
1963           rtx result = adjust_bitfield_address (str_rtx, mode1,
1964                                                 bitnum / BITS_PER_UNIT);
1965           if (reverse)
1966             result = flip_storage_order (mode1, result);
1967           gcc_assert (bitnum % BITS_PER_UNIT == 0);
1968           return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1969         }
1970
1971       str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1972                                       &bitnum);
1973       gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (mode1));
1974       str_rtx = copy_to_reg (str_rtx);
1975     }
1976
1977   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1978                               target, mode, tmode, reverse, true, alt_rtl);
1979 }
1980 \f
1981 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1982    from bit BITNUM of OP0.
1983
1984    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1985    If REVERSE is true, the extraction is to be done in reverse order.
1986
1987    If TARGET is nonzero, attempts to store the value there
1988    and return TARGET, but this is not guaranteed.
1989    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1990
1991 static rtx
1992 extract_fixed_bit_field (machine_mode tmode, rtx op0,
1993                          unsigned HOST_WIDE_INT bitsize,
1994                          unsigned HOST_WIDE_INT bitnum, rtx target,
1995                          int unsignedp, bool reverse)
1996 {
1997   if (MEM_P (op0))
1998     {
1999       machine_mode mode
2000         = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
2001                          MEM_VOLATILE_P (op0));
2002
2003       if (mode == VOIDmode)
2004         /* The only way this should occur is if the field spans word
2005            boundaries.  */
2006         return extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
2007                                         reverse);
2008
2009       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
2010     }
2011
2012   return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
2013                                     target, unsignedp, reverse);
2014 }
2015
2016 /* Helper function for extract_fixed_bit_field, extracts
2017    the bit field always using the MODE of OP0.  */
2018
2019 static rtx
2020 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
2021                            unsigned HOST_WIDE_INT bitsize,
2022                            unsigned HOST_WIDE_INT bitnum, rtx target,
2023                            int unsignedp, bool reverse)
2024 {
2025   machine_mode mode = GET_MODE (op0);
2026   gcc_assert (SCALAR_INT_MODE_P (mode));
2027
2028   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2029      for invalid input, such as extract equivalent of f5 from
2030      gcc.dg/pr48335-2.c.  */
2031
2032   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2033     /* BITNUM is the distance between our msb and that of OP0.
2034        Convert it to the distance from the lsb.  */
2035     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2036
2037   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2038      We have reduced the big-endian case to the little-endian case.  */
2039   if (reverse)
2040     op0 = flip_storage_order (mode, op0);
2041
2042   if (unsignedp)
2043     {
2044       if (bitnum)
2045         {
2046           /* If the field does not already start at the lsb,
2047              shift it so it does.  */
2048           /* Maybe propagate the target for the shift.  */
2049           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2050           if (tmode != mode)
2051             subtarget = 0;
2052           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2053         }
2054       /* Convert the value to the desired mode.  */
2055       if (mode != tmode)
2056         op0 = convert_to_mode (tmode, op0, 1);
2057
2058       /* Unless the msb of the field used to be the msb when we shifted,
2059          mask out the upper bits.  */
2060
2061       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2062         return expand_binop (GET_MODE (op0), and_optab, op0,
2063                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
2064                              target, 1, OPTAB_LIB_WIDEN);
2065       return op0;
2066     }
2067
2068   /* To extract a signed bit-field, first shift its msb to the msb of the word,
2069      then arithmetic-shift its lsb to the lsb of the word.  */
2070   op0 = force_reg (mode, op0);
2071
2072   /* Find the narrowest integer mode that contains the field.  */
2073
2074   FOR_EACH_MODE_IN_CLASS (mode, MODE_INT)
2075     if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
2076       {
2077         op0 = convert_to_mode (mode, op0, 0);
2078         break;
2079       }
2080
2081   if (mode != tmode)
2082     target = 0;
2083
2084   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2085     {
2086       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2087       /* Maybe propagate the target for the shift.  */
2088       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2089       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2090     }
2091
2092   return expand_shift (RSHIFT_EXPR, mode, op0,
2093                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2094 }
2095
2096 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2097    VALUE << BITPOS.  */
2098
2099 static rtx
2100 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2101               int bitpos)
2102 {
2103   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2104 }
2105 \f
2106 /* Extract a bit field that is split across two words
2107    and return an RTX for the result.
2108
2109    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2110    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2111    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2112
2113    If REVERSE is true, the extraction is to be done in reverse order.  */
2114
2115 static rtx
2116 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2117                          unsigned HOST_WIDE_INT bitpos, int unsignedp,
2118                          bool reverse)
2119 {
2120   unsigned int unit;
2121   unsigned int bitsdone = 0;
2122   rtx result = NULL_RTX;
2123   int first = 1;
2124
2125   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2126      much at a time.  */
2127   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2128     unit = BITS_PER_WORD;
2129   else
2130     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2131
2132   while (bitsdone < bitsize)
2133     {
2134       unsigned HOST_WIDE_INT thissize;
2135       rtx part, word;
2136       unsigned HOST_WIDE_INT thispos;
2137       unsigned HOST_WIDE_INT offset;
2138
2139       offset = (bitpos + bitsdone) / unit;
2140       thispos = (bitpos + bitsdone) % unit;
2141
2142       /* THISSIZE must not overrun a word boundary.  Otherwise,
2143          extract_fixed_bit_field will call us again, and we will mutually
2144          recurse forever.  */
2145       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2146       thissize = MIN (thissize, unit - thispos);
2147
2148       /* If OP0 is a register, then handle OFFSET here.  */
2149       if (SUBREG_P (op0) || REG_P (op0))
2150         {
2151           word = operand_subword_force (op0, offset, GET_MODE (op0));
2152           offset = 0;
2153         }
2154       else
2155         word = op0;
2156
2157       /* Extract the parts in bit-counting order,
2158          whose meaning is determined by BYTES_PER_UNIT.
2159          OFFSET is in UNITs, and UNIT is in bits.  */
2160       part = extract_fixed_bit_field (word_mode, word, thissize,
2161                                       offset * unit + thispos, 0, 1, reverse);
2162       bitsdone += thissize;
2163
2164       /* Shift this part into place for the result.  */
2165       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2166         {
2167           if (bitsize != bitsdone)
2168             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2169                                  bitsize - bitsdone, 0, 1);
2170         }
2171       else
2172         {
2173           if (bitsdone != thissize)
2174             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2175                                  bitsdone - thissize, 0, 1);
2176         }
2177
2178       if (first)
2179         result = part;
2180       else
2181         /* Combine the parts with bitwise or.  This works
2182            because we extracted each part as an unsigned bit field.  */
2183         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2184                                OPTAB_LIB_WIDEN);
2185
2186       first = 0;
2187     }
2188
2189   /* Unsigned bit field: we are done.  */
2190   if (unsignedp)
2191     return result;
2192   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2193   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2194                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2195   return expand_shift (RSHIFT_EXPR, word_mode, result,
2196                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2197 }
2198 \f
2199 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2200    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2201    MODE, fill the upper bits with zeros.  Fail if the layout of either
2202    mode is unknown (as for CC modes) or if the extraction would involve
2203    unprofitable mode punning.  Return the value on success, otherwise
2204    return null.
2205
2206    This is different from gen_lowpart* in these respects:
2207
2208      - the returned value must always be considered an rvalue
2209
2210      - when MODE is wider than SRC_MODE, the extraction involves
2211        a zero extension
2212
2213      - when MODE is smaller than SRC_MODE, the extraction involves
2214        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2215
2216    In other words, this routine performs a computation, whereas the
2217    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2218    operations.  */
2219
2220 rtx
2221 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2222 {
2223   machine_mode int_mode, src_int_mode;
2224
2225   if (mode == src_mode)
2226     return src;
2227
2228   if (CONSTANT_P (src))
2229     {
2230       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2231          fails, it will happily create (subreg (symbol_ref)) or similar
2232          invalid SUBREGs.  */
2233       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2234       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2235       if (ret)
2236         return ret;
2237
2238       if (GET_MODE (src) == VOIDmode
2239           || !validate_subreg (mode, src_mode, src, byte))
2240         return NULL_RTX;
2241
2242       src = force_reg (GET_MODE (src), src);
2243       return gen_rtx_SUBREG (mode, src, byte);
2244     }
2245
2246   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2247     return NULL_RTX;
2248
2249   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2250       && MODES_TIEABLE_P (mode, src_mode))
2251     {
2252       rtx x = gen_lowpart_common (mode, src);
2253       if (x)
2254         return x;
2255     }
2256
2257   if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
2258       || !int_mode_for_mode (mode).exists (&int_mode))
2259     return NULL_RTX;
2260
2261   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2262     return NULL_RTX;
2263   if (!MODES_TIEABLE_P (int_mode, mode))
2264     return NULL_RTX;
2265
2266   src = gen_lowpart (src_int_mode, src);
2267   src = convert_modes (int_mode, src_int_mode, src, true);
2268   src = gen_lowpart (mode, src);
2269   return src;
2270 }
2271 \f
2272 /* Add INC into TARGET.  */
2273
2274 void
2275 expand_inc (rtx target, rtx inc)
2276 {
2277   rtx value = expand_binop (GET_MODE (target), add_optab,
2278                             target, inc,
2279                             target, 0, OPTAB_LIB_WIDEN);
2280   if (value != target)
2281     emit_move_insn (target, value);
2282 }
2283
2284 /* Subtract DEC from TARGET.  */
2285
2286 void
2287 expand_dec (rtx target, rtx dec)
2288 {
2289   rtx value = expand_binop (GET_MODE (target), sub_optab,
2290                             target, dec,
2291                             target, 0, OPTAB_LIB_WIDEN);
2292   if (value != target)
2293     emit_move_insn (target, value);
2294 }
2295 \f
2296 /* Output a shift instruction for expression code CODE,
2297    with SHIFTED being the rtx for the value to shift,
2298    and AMOUNT the rtx for the amount to shift by.
2299    Store the result in the rtx TARGET, if that is convenient.
2300    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2301    Return the rtx for where the value is.
2302    If that cannot be done, abort the compilation unless MAY_FAIL is true,
2303    in which case 0 is returned.  */
2304
2305 static rtx
2306 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2307                 rtx amount, rtx target, int unsignedp, bool may_fail = false)
2308 {
2309   rtx op1, temp = 0;
2310   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2311   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2312   optab lshift_optab = ashl_optab;
2313   optab rshift_arith_optab = ashr_optab;
2314   optab rshift_uns_optab = lshr_optab;
2315   optab lrotate_optab = rotl_optab;
2316   optab rrotate_optab = rotr_optab;
2317   machine_mode op1_mode;
2318   machine_mode scalar_mode = mode;
2319   int attempt;
2320   bool speed = optimize_insn_for_speed_p ();
2321
2322   if (VECTOR_MODE_P (mode))
2323     scalar_mode = GET_MODE_INNER (mode);
2324   op1 = amount;
2325   op1_mode = GET_MODE (op1);
2326
2327   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2328      shift amount is a vector, use the vector/vector shift patterns.  */
2329   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2330     {
2331       lshift_optab = vashl_optab;
2332       rshift_arith_optab = vashr_optab;
2333       rshift_uns_optab = vlshr_optab;
2334       lrotate_optab = vrotl_optab;
2335       rrotate_optab = vrotr_optab;
2336     }
2337
2338   /* Previously detected shift-counts computed by NEGATE_EXPR
2339      and shifted in the other direction; but that does not work
2340      on all machines.  */
2341
2342   if (SHIFT_COUNT_TRUNCATED)
2343     {
2344       if (CONST_INT_P (op1)
2345           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2346               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2347         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2348                        % GET_MODE_BITSIZE (scalar_mode));
2349       else if (GET_CODE (op1) == SUBREG
2350                && subreg_lowpart_p (op1)
2351                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2352                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2353         op1 = SUBREG_REG (op1);
2354     }
2355
2356   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2357      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2358      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2359      amount instead.  */
2360   if (rotate
2361       && CONST_INT_P (op1)
2362       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2363                    GET_MODE_BITSIZE (scalar_mode) - 1))
2364     {
2365       op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2366       left = !left;
2367       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2368     }
2369
2370   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2371      Note that this is not the case for bigger values.  For instance a rotation
2372      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2373      0x04030201 (bswapsi).  */
2374   if (rotate
2375       && CONST_INT_P (op1)
2376       && INTVAL (op1) == BITS_PER_UNIT
2377       && GET_MODE_SIZE (scalar_mode) == 2
2378       && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2379     return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2380                                   unsignedp);
2381
2382   if (op1 == const0_rtx)
2383     return shifted;
2384
2385   /* Check whether its cheaper to implement a left shift by a constant
2386      bit count by a sequence of additions.  */
2387   if (code == LSHIFT_EXPR
2388       && CONST_INT_P (op1)
2389       && INTVAL (op1) > 0
2390       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2391       && INTVAL (op1) < MAX_BITS_PER_WORD
2392       && (shift_cost (speed, mode, INTVAL (op1))
2393           > INTVAL (op1) * add_cost (speed, mode))
2394       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2395     {
2396       int i;
2397       for (i = 0; i < INTVAL (op1); i++)
2398         {
2399           temp = force_reg (mode, shifted);
2400           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2401                                   unsignedp, OPTAB_LIB_WIDEN);
2402         }
2403       return shifted;
2404     }
2405
2406   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2407     {
2408       enum optab_methods methods;
2409
2410       if (attempt == 0)
2411         methods = OPTAB_DIRECT;
2412       else if (attempt == 1)
2413         methods = OPTAB_WIDEN;
2414       else
2415         methods = OPTAB_LIB_WIDEN;
2416
2417       if (rotate)
2418         {
2419           /* Widening does not work for rotation.  */
2420           if (methods == OPTAB_WIDEN)
2421             continue;
2422           else if (methods == OPTAB_LIB_WIDEN)
2423             {
2424               /* If we have been unable to open-code this by a rotation,
2425                  do it as the IOR of two shifts.  I.e., to rotate A
2426                  by N bits, compute
2427                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2428                  where C is the bitsize of A.
2429
2430                  It is theoretically possible that the target machine might
2431                  not be able to perform either shift and hence we would
2432                  be making two libcalls rather than just the one for the
2433                  shift (similarly if IOR could not be done).  We will allow
2434                  this extremely unlikely lossage to avoid complicating the
2435                  code below.  */
2436
2437               rtx subtarget = target == shifted ? 0 : target;
2438               rtx new_amount, other_amount;
2439               rtx temp1;
2440
2441               new_amount = op1;
2442               if (op1 == const0_rtx)
2443                 return shifted;
2444               else if (CONST_INT_P (op1))
2445                 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2446                                         - INTVAL (op1));
2447               else
2448                 {
2449                   other_amount
2450                     = simplify_gen_unary (NEG, GET_MODE (op1),
2451                                           op1, GET_MODE (op1));
2452                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2453                   other_amount
2454                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2455                                            gen_int_mode (mask, GET_MODE (op1)));
2456                 }
2457
2458               shifted = force_reg (mode, shifted);
2459
2460               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2461                                      mode, shifted, new_amount, 0, 1);
2462               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2463                                       mode, shifted, other_amount,
2464                                       subtarget, 1);
2465               return expand_binop (mode, ior_optab, temp, temp1, target,
2466                                    unsignedp, methods);
2467             }
2468
2469           temp = expand_binop (mode,
2470                                left ? lrotate_optab : rrotate_optab,
2471                                shifted, op1, target, unsignedp, methods);
2472         }
2473       else if (unsignedp)
2474         temp = expand_binop (mode,
2475                              left ? lshift_optab : rshift_uns_optab,
2476                              shifted, op1, target, unsignedp, methods);
2477
2478       /* Do arithmetic shifts.
2479          Also, if we are going to widen the operand, we can just as well
2480          use an arithmetic right-shift instead of a logical one.  */
2481       if (temp == 0 && ! rotate
2482           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2483         {
2484           enum optab_methods methods1 = methods;
2485
2486           /* If trying to widen a log shift to an arithmetic shift,
2487              don't accept an arithmetic shift of the same size.  */
2488           if (unsignedp)
2489             methods1 = OPTAB_MUST_WIDEN;
2490
2491           /* Arithmetic shift */
2492
2493           temp = expand_binop (mode,
2494                                left ? lshift_optab : rshift_arith_optab,
2495                                shifted, op1, target, unsignedp, methods1);
2496         }
2497
2498       /* We used to try extzv here for logical right shifts, but that was
2499          only useful for one machine, the VAX, and caused poor code
2500          generation there for lshrdi3, so the code was deleted and a
2501          define_expand for lshrsi3 was added to vax.md.  */
2502     }
2503
2504   gcc_assert (temp != NULL_RTX || may_fail);
2505   return temp;
2506 }
2507
2508 /* Output a shift instruction for expression code CODE,
2509    with SHIFTED being the rtx for the value to shift,
2510    and AMOUNT the amount to shift by.
2511    Store the result in the rtx TARGET, if that is convenient.
2512    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2513    Return the rtx for where the value is.  */
2514
2515 rtx
2516 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2517               int amount, rtx target, int unsignedp)
2518 {
2519   return expand_shift_1 (code, mode,
2520                          shifted, GEN_INT (amount), target, unsignedp);
2521 }
2522
2523 /* Likewise, but return 0 if that cannot be done.  */
2524
2525 static rtx
2526 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2527                     int amount, rtx target, int unsignedp)
2528 {
2529   return expand_shift_1 (code, mode,
2530                          shifted, GEN_INT (amount), target, unsignedp, true);
2531 }
2532
2533 /* Output a shift instruction for expression code CODE,
2534    with SHIFTED being the rtx for the value to shift,
2535    and AMOUNT the tree for the amount to shift by.
2536    Store the result in the rtx TARGET, if that is convenient.
2537    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2538    Return the rtx for where the value is.  */
2539
2540 rtx
2541 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2542                        tree amount, rtx target, int unsignedp)
2543 {
2544   return expand_shift_1 (code, mode,
2545                          shifted, expand_normal (amount), target, unsignedp);
2546 }
2547
2548 \f
2549 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2550                         const struct mult_cost *, machine_mode mode);
2551 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2552                               const struct algorithm *, enum mult_variant);
2553 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2554 static rtx extract_high_half (machine_mode, rtx);
2555 static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2556 static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
2557                                        int, int);
2558 /* Compute and return the best algorithm for multiplying by T.
2559    The algorithm must cost less than cost_limit
2560    If retval.cost >= COST_LIMIT, no algorithm was found and all
2561    other field of the returned struct are undefined.
2562    MODE is the machine mode of the multiplication.  */
2563
2564 static void
2565 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2566             const struct mult_cost *cost_limit, machine_mode mode)
2567 {
2568   int m;
2569   struct algorithm *alg_in, *best_alg;
2570   struct mult_cost best_cost;
2571   struct mult_cost new_limit;
2572   int op_cost, op_latency;
2573   unsigned HOST_WIDE_INT orig_t = t;
2574   unsigned HOST_WIDE_INT q;
2575   int maxm, hash_index;
2576   bool cache_hit = false;
2577   enum alg_code cache_alg = alg_zero;
2578   bool speed = optimize_insn_for_speed_p ();
2579   machine_mode imode;
2580   struct alg_hash_entry *entry_ptr;
2581
2582   /* Indicate that no algorithm is yet found.  If no algorithm
2583      is found, this value will be returned and indicate failure.  */
2584   alg_out->cost.cost = cost_limit->cost + 1;
2585   alg_out->cost.latency = cost_limit->latency + 1;
2586
2587   if (cost_limit->cost < 0
2588       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2589     return;
2590
2591   /* Be prepared for vector modes.  */
2592   imode = GET_MODE_INNER (mode);
2593
2594   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2595
2596   /* Restrict the bits of "t" to the multiplication's mode.  */
2597   t &= GET_MODE_MASK (imode);
2598
2599   /* t == 1 can be done in zero cost.  */
2600   if (t == 1)
2601     {
2602       alg_out->ops = 1;
2603       alg_out->cost.cost = 0;
2604       alg_out->cost.latency = 0;
2605       alg_out->op[0] = alg_m;
2606       return;
2607     }
2608
2609   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2610      fail now.  */
2611   if (t == 0)
2612     {
2613       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2614         return;
2615       else
2616         {
2617           alg_out->ops = 1;
2618           alg_out->cost.cost = zero_cost (speed);
2619           alg_out->cost.latency = zero_cost (speed);
2620           alg_out->op[0] = alg_zero;
2621           return;
2622         }
2623     }
2624
2625   /* We'll be needing a couple extra algorithm structures now.  */
2626
2627   alg_in = XALLOCA (struct algorithm);
2628   best_alg = XALLOCA (struct algorithm);
2629   best_cost = *cost_limit;
2630
2631   /* Compute the hash index.  */
2632   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2633
2634   /* See if we already know what to do for T.  */
2635   entry_ptr = alg_hash_entry_ptr (hash_index);
2636   if (entry_ptr->t == t
2637       && entry_ptr->mode == mode
2638       && entry_ptr->speed == speed
2639       && entry_ptr->alg != alg_unknown)
2640     {
2641       cache_alg = entry_ptr->alg;
2642
2643       if (cache_alg == alg_impossible)
2644         {
2645           /* The cache tells us that it's impossible to synthesize
2646              multiplication by T within entry_ptr->cost.  */
2647           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2648             /* COST_LIMIT is at least as restrictive as the one
2649                recorded in the hash table, in which case we have no
2650                hope of synthesizing a multiplication.  Just
2651                return.  */
2652             return;
2653
2654           /* If we get here, COST_LIMIT is less restrictive than the
2655              one recorded in the hash table, so we may be able to
2656              synthesize a multiplication.  Proceed as if we didn't
2657              have the cache entry.  */
2658         }
2659       else
2660         {
2661           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2662             /* The cached algorithm shows that this multiplication
2663                requires more cost than COST_LIMIT.  Just return.  This
2664                way, we don't clobber this cache entry with
2665                alg_impossible but retain useful information.  */
2666             return;
2667
2668           cache_hit = true;
2669
2670           switch (cache_alg)
2671             {
2672             case alg_shift:
2673               goto do_alg_shift;
2674
2675             case alg_add_t_m2:
2676             case alg_sub_t_m2:
2677               goto do_alg_addsub_t_m2;
2678
2679             case alg_add_factor:
2680             case alg_sub_factor:
2681               goto do_alg_addsub_factor;
2682
2683             case alg_add_t2_m:
2684               goto do_alg_add_t2_m;
2685
2686             case alg_sub_t2_m:
2687               goto do_alg_sub_t2_m;
2688
2689             default:
2690               gcc_unreachable ();
2691             }
2692         }
2693     }
2694
2695   /* If we have a group of zero bits at the low-order part of T, try
2696      multiplying by the remaining bits and then doing a shift.  */
2697
2698   if ((t & 1) == 0)
2699     {
2700     do_alg_shift:
2701       m = ctz_or_zero (t); /* m = number of low zero bits */
2702       if (m < maxm)
2703         {
2704           q = t >> m;
2705           /* The function expand_shift will choose between a shift and
2706              a sequence of additions, so the observed cost is given as
2707              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2708           op_cost = m * add_cost (speed, mode);
2709           if (shift_cost (speed, mode, m) < op_cost)
2710             op_cost = shift_cost (speed, mode, m);
2711           new_limit.cost = best_cost.cost - op_cost;
2712           new_limit.latency = best_cost.latency - op_cost;
2713           synth_mult (alg_in, q, &new_limit, mode);
2714
2715           alg_in->cost.cost += op_cost;
2716           alg_in->cost.latency += op_cost;
2717           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2718             {
2719               best_cost = alg_in->cost;
2720               std::swap (alg_in, best_alg);
2721               best_alg->log[best_alg->ops] = m;
2722               best_alg->op[best_alg->ops] = alg_shift;
2723             }
2724
2725           /* See if treating ORIG_T as a signed number yields a better
2726              sequence.  Try this sequence only for a negative ORIG_T
2727              as it would be useless for a non-negative ORIG_T.  */
2728           if ((HOST_WIDE_INT) orig_t < 0)
2729             {
2730               /* Shift ORIG_T as follows because a right shift of a
2731                  negative-valued signed type is implementation
2732                  defined.  */
2733               q = ~(~orig_t >> m);
2734               /* The function expand_shift will choose between a shift
2735                  and a sequence of additions, so the observed cost is
2736                  given as MIN (m * add_cost(speed, mode),
2737                  shift_cost(speed, mode, m)).  */
2738               op_cost = m * add_cost (speed, mode);
2739               if (shift_cost (speed, mode, m) < op_cost)
2740                 op_cost = shift_cost (speed, mode, m);
2741               new_limit.cost = best_cost.cost - op_cost;
2742               new_limit.latency = best_cost.latency - op_cost;
2743               synth_mult (alg_in, q, &new_limit, mode);
2744
2745               alg_in->cost.cost += op_cost;
2746               alg_in->cost.latency += op_cost;
2747               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2748                 {
2749                   best_cost = alg_in->cost;
2750                   std::swap (alg_in, best_alg);
2751                   best_alg->log[best_alg->ops] = m;
2752                   best_alg->op[best_alg->ops] = alg_shift;
2753                 }
2754             }
2755         }
2756       if (cache_hit)
2757         goto done;
2758     }
2759
2760   /* If we have an odd number, add or subtract one.  */
2761   if ((t & 1) != 0)
2762     {
2763       unsigned HOST_WIDE_INT w;
2764
2765     do_alg_addsub_t_m2:
2766       for (w = 1; (w & t) != 0; w <<= 1)
2767         ;
2768       /* If T was -1, then W will be zero after the loop.  This is another
2769          case where T ends with ...111.  Handling this with (T + 1) and
2770          subtract 1 produces slightly better code and results in algorithm
2771          selection much faster than treating it like the ...0111 case
2772          below.  */
2773       if (w == 0
2774           || (w > 2
2775               /* Reject the case where t is 3.
2776                  Thus we prefer addition in that case.  */
2777               && t != 3))
2778         {
2779           /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2780
2781           op_cost = add_cost (speed, mode);
2782           new_limit.cost = best_cost.cost - op_cost;
2783           new_limit.latency = best_cost.latency - op_cost;
2784           synth_mult (alg_in, t + 1, &new_limit, mode);
2785
2786           alg_in->cost.cost += op_cost;
2787           alg_in->cost.latency += op_cost;
2788           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2789             {
2790               best_cost = alg_in->cost;
2791               std::swap (alg_in, best_alg);
2792               best_alg->log[best_alg->ops] = 0;
2793               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2794             }
2795         }
2796       else
2797         {
2798           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2799
2800           op_cost = add_cost (speed, mode);
2801           new_limit.cost = best_cost.cost - op_cost;
2802           new_limit.latency = best_cost.latency - op_cost;
2803           synth_mult (alg_in, t - 1, &new_limit, mode);
2804
2805           alg_in->cost.cost += op_cost;
2806           alg_in->cost.latency += op_cost;
2807           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2808             {
2809               best_cost = alg_in->cost;
2810               std::swap (alg_in, best_alg);
2811               best_alg->log[best_alg->ops] = 0;
2812               best_alg->op[best_alg->ops] = alg_add_t_m2;
2813             }
2814         }
2815
2816       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2817          quickly with a - a * n for some appropriate constant n.  */
2818       m = exact_log2 (-orig_t + 1);
2819       if (m >= 0 && m < maxm)
2820         {
2821           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2822           /* If the target has a cheap shift-and-subtract insn use
2823              that in preference to a shift insn followed by a sub insn.
2824              Assume that the shift-and-sub is "atomic" with a latency
2825              equal to it's cost, otherwise assume that on superscalar
2826              hardware the shift may be executed concurrently with the
2827              earlier steps in the algorithm.  */
2828           if (shiftsub1_cost (speed, mode, m) <= op_cost)
2829             {
2830               op_cost = shiftsub1_cost (speed, mode, m);
2831               op_latency = op_cost;
2832             }
2833           else
2834             op_latency = add_cost (speed, mode);
2835
2836           new_limit.cost = best_cost.cost - op_cost;
2837           new_limit.latency = best_cost.latency - op_latency;
2838           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2839                       &new_limit, mode);
2840
2841           alg_in->cost.cost += op_cost;
2842           alg_in->cost.latency += op_latency;
2843           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2844             {
2845               best_cost = alg_in->cost;
2846               std::swap (alg_in, best_alg);
2847               best_alg->log[best_alg->ops] = m;
2848               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2849             }
2850         }
2851
2852       if (cache_hit)
2853         goto done;
2854     }
2855
2856   /* Look for factors of t of the form
2857      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2858      If we find such a factor, we can multiply by t using an algorithm that
2859      multiplies by q, shift the result by m and add/subtract it to itself.
2860
2861      We search for large factors first and loop down, even if large factors
2862      are less probable than small; if we find a large factor we will find a
2863      good sequence quickly, and therefore be able to prune (by decreasing
2864      COST_LIMIT) the search.  */
2865
2866  do_alg_addsub_factor:
2867   for (m = floor_log2 (t - 1); m >= 2; m--)
2868     {
2869       unsigned HOST_WIDE_INT d;
2870
2871       d = (HOST_WIDE_INT_1U << m) + 1;
2872       if (t % d == 0 && t > d && m < maxm
2873           && (!cache_hit || cache_alg == alg_add_factor))
2874         {
2875           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2876           if (shiftadd_cost (speed, mode, m) <= op_cost)
2877             op_cost = shiftadd_cost (speed, mode, m);
2878
2879           op_latency = op_cost;
2880
2881
2882           new_limit.cost = best_cost.cost - op_cost;
2883           new_limit.latency = best_cost.latency - op_latency;
2884           synth_mult (alg_in, t / d, &new_limit, mode);
2885
2886           alg_in->cost.cost += op_cost;
2887           alg_in->cost.latency += op_latency;
2888           if (alg_in->cost.latency < op_cost)
2889             alg_in->cost.latency = op_cost;
2890           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2891             {
2892               best_cost = alg_in->cost;
2893               std::swap (alg_in, best_alg);
2894               best_alg->log[best_alg->ops] = m;
2895               best_alg->op[best_alg->ops] = alg_add_factor;
2896             }
2897           /* Other factors will have been taken care of in the recursion.  */
2898           break;
2899         }
2900
2901       d = (HOST_WIDE_INT_1U << m) - 1;
2902       if (t % d == 0 && t > d && m < maxm
2903           && (!cache_hit || cache_alg == alg_sub_factor))
2904         {
2905           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2906           if (shiftsub0_cost (speed, mode, m) <= op_cost)
2907             op_cost = shiftsub0_cost (speed, mode, m);
2908
2909           op_latency = op_cost;
2910
2911           new_limit.cost = best_cost.cost - op_cost;
2912           new_limit.latency = best_cost.latency - op_latency;
2913           synth_mult (alg_in, t / d, &new_limit, mode);
2914
2915           alg_in->cost.cost += op_cost;
2916           alg_in->cost.latency += op_latency;
2917           if (alg_in->cost.latency < op_cost)
2918             alg_in->cost.latency = op_cost;
2919           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2920             {
2921               best_cost = alg_in->cost;
2922               std::swap (alg_in, best_alg);
2923               best_alg->log[best_alg->ops] = m;
2924               best_alg->op[best_alg->ops] = alg_sub_factor;
2925             }
2926           break;
2927         }
2928     }
2929   if (cache_hit)
2930     goto done;
2931
2932   /* Try shift-and-add (load effective address) instructions,
2933      i.e. do a*3, a*5, a*9.  */
2934   if ((t & 1) != 0)
2935     {
2936     do_alg_add_t2_m:
2937       q = t - 1;
2938       m = ctz_hwi (q);
2939       if (q && m < maxm)
2940         {
2941           op_cost = shiftadd_cost (speed, mode, m);
2942           new_limit.cost = best_cost.cost - op_cost;
2943           new_limit.latency = best_cost.latency - op_cost;
2944           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2945
2946           alg_in->cost.cost += op_cost;
2947           alg_in->cost.latency += op_cost;
2948           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2949             {
2950               best_cost = alg_in->cost;
2951               std::swap (alg_in, best_alg);
2952               best_alg->log[best_alg->ops] = m;
2953               best_alg->op[best_alg->ops] = alg_add_t2_m;
2954             }
2955         }
2956       if (cache_hit)
2957         goto done;
2958
2959     do_alg_sub_t2_m:
2960       q = t + 1;
2961       m = ctz_hwi (q);
2962       if (q && m < maxm)
2963         {
2964           op_cost = shiftsub0_cost (speed, mode, m);
2965           new_limit.cost = best_cost.cost - op_cost;
2966           new_limit.latency = best_cost.latency - op_cost;
2967           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2968
2969           alg_in->cost.cost += op_cost;
2970           alg_in->cost.latency += op_cost;
2971           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2972             {
2973               best_cost = alg_in->cost;
2974               std::swap (alg_in, best_alg);
2975               best_alg->log[best_alg->ops] = m;
2976               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2977             }
2978         }
2979       if (cache_hit)
2980         goto done;
2981     }
2982
2983  done:
2984   /* If best_cost has not decreased, we have not found any algorithm.  */
2985   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2986     {
2987       /* We failed to find an algorithm.  Record alg_impossible for
2988          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2989          we are asked to find an algorithm for T within the same or
2990          lower COST_LIMIT, we can immediately return to the
2991          caller.  */
2992       entry_ptr->t = t;
2993       entry_ptr->mode = mode;
2994       entry_ptr->speed = speed;
2995       entry_ptr->alg = alg_impossible;
2996       entry_ptr->cost = *cost_limit;
2997       return;
2998     }
2999
3000   /* Cache the result.  */
3001   if (!cache_hit)
3002     {
3003       entry_ptr->t = t;
3004       entry_ptr->mode = mode;
3005       entry_ptr->speed = speed;
3006       entry_ptr->alg = best_alg->op[best_alg->ops];
3007       entry_ptr->cost.cost = best_cost.cost;
3008       entry_ptr->cost.latency = best_cost.latency;
3009     }
3010
3011   /* If we are getting a too long sequence for `struct algorithm'
3012      to record, make this search fail.  */
3013   if (best_alg->ops == MAX_BITS_PER_WORD)
3014     return;
3015
3016   /* Copy the algorithm from temporary space to the space at alg_out.
3017      We avoid using structure assignment because the majority of
3018      best_alg is normally undefined, and this is a critical function.  */
3019   alg_out->ops = best_alg->ops + 1;
3020   alg_out->cost = best_cost;
3021   memcpy (alg_out->op, best_alg->op,
3022           alg_out->ops * sizeof *alg_out->op);
3023   memcpy (alg_out->log, best_alg->log,
3024           alg_out->ops * sizeof *alg_out->log);
3025 }
3026 \f
3027 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3028    Try three variations:
3029
3030        - a shift/add sequence based on VAL itself
3031        - a shift/add sequence based on -VAL, followed by a negation
3032        - a shift/add sequence based on VAL - 1, followed by an addition.
3033
3034    Return true if the cheapest of these cost less than MULT_COST,
3035    describing the algorithm in *ALG and final fixup in *VARIANT.  */
3036
3037 bool
3038 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3039                      struct algorithm *alg, enum mult_variant *variant,
3040                      int mult_cost)
3041 {
3042   struct algorithm alg2;
3043   struct mult_cost limit;
3044   int op_cost;
3045   bool speed = optimize_insn_for_speed_p ();
3046
3047   /* Fail quickly for impossible bounds.  */
3048   if (mult_cost < 0)
3049     return false;
3050
3051   /* Ensure that mult_cost provides a reasonable upper bound.
3052      Any constant multiplication can be performed with less
3053      than 2 * bits additions.  */
3054   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3055   if (mult_cost > op_cost)
3056     mult_cost = op_cost;
3057
3058   *variant = basic_variant;
3059   limit.cost = mult_cost;
3060   limit.latency = mult_cost;
3061   synth_mult (alg, val, &limit, mode);
3062
3063   /* This works only if the inverted value actually fits in an
3064      `unsigned int' */
3065   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3066     {
3067       op_cost = neg_cost (speed, mode);
3068       if (MULT_COST_LESS (&alg->cost, mult_cost))
3069         {
3070           limit.cost = alg->cost.cost - op_cost;
3071           limit.latency = alg->cost.latency - op_cost;
3072         }
3073       else
3074         {
3075           limit.cost = mult_cost - op_cost;
3076           limit.latency = mult_cost - op_cost;
3077         }
3078
3079       synth_mult (&alg2, -val, &limit, mode);
3080       alg2.cost.cost += op_cost;
3081       alg2.cost.latency += op_cost;
3082       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3083         *alg = alg2, *variant = negate_variant;
3084     }
3085
3086   /* This proves very useful for division-by-constant.  */
3087   op_cost = add_cost (speed, mode);
3088   if (MULT_COST_LESS (&alg->cost, mult_cost))
3089     {
3090       limit.cost = alg->cost.cost - op_cost;
3091       limit.latency = alg->cost.latency - op_cost;
3092     }
3093   else
3094     {
3095       limit.cost = mult_cost - op_cost;
3096       limit.latency = mult_cost - op_cost;
3097     }
3098
3099   synth_mult (&alg2, val - 1, &limit, mode);
3100   alg2.cost.cost += op_cost;
3101   alg2.cost.latency += op_cost;
3102   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3103     *alg = alg2, *variant = add_variant;
3104
3105   return MULT_COST_LESS (&alg->cost, mult_cost);
3106 }
3107
3108 /* A subroutine of expand_mult, used for constant multiplications.
3109    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3110    convenient.  Use the shift/add sequence described by ALG and apply
3111    the final fixup specified by VARIANT.  */
3112
3113 static rtx
3114 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3115                    rtx target, const struct algorithm *alg,
3116                    enum mult_variant variant)
3117 {
3118   unsigned HOST_WIDE_INT val_so_far;
3119   rtx_insn *insn;
3120   rtx accum, tem;
3121   int opno;
3122   machine_mode nmode;
3123
3124   /* Avoid referencing memory over and over and invalid sharing
3125      on SUBREGs.  */
3126   op0 = force_reg (mode, op0);
3127
3128   /* ACCUM starts out either as OP0 or as a zero, depending on
3129      the first operation.  */
3130
3131   if (alg->op[0] == alg_zero)
3132     {
3133       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3134       val_so_far = 0;
3135     }
3136   else if (alg->op[0] == alg_m)
3137     {
3138       accum = copy_to_mode_reg (mode, op0);
3139       val_so_far = 1;
3140     }
3141   else
3142     gcc_unreachable ();
3143
3144   for (opno = 1; opno < alg->ops; opno++)
3145     {
3146       int log = alg->log[opno];
3147       rtx shift_subtarget = optimize ? 0 : accum;
3148       rtx add_target
3149         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3150            && !optimize)
3151           ? target : 0;
3152       rtx accum_target = optimize ? 0 : accum;
3153       rtx accum_inner;
3154
3155       switch (alg->op[opno])
3156         {
3157         case alg_shift:
3158           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3159           /* REG_EQUAL note will be attached to the following insn.  */
3160           emit_move_insn (accum, tem);
3161           val_so_far <<= log;
3162           break;
3163
3164         case alg_add_t_m2:
3165           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3166           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3167                                  add_target ? add_target : accum_target);
3168           val_so_far += HOST_WIDE_INT_1U << log;
3169           break;
3170
3171         case alg_sub_t_m2:
3172           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3173           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3174                                  add_target ? add_target : accum_target);
3175           val_so_far -= HOST_WIDE_INT_1U << log;
3176           break;
3177
3178         case alg_add_t2_m:
3179           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3180                                 log, shift_subtarget, 0);
3181           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3182                                  add_target ? add_target : accum_target);
3183           val_so_far = (val_so_far << log) + 1;
3184           break;
3185
3186         case alg_sub_t2_m:
3187           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3188                                 log, shift_subtarget, 0);
3189           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3190                                  add_target ? add_target : accum_target);
3191           val_so_far = (val_so_far << log) - 1;
3192           break;
3193
3194         case alg_add_factor:
3195           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3196           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3197                                  add_target ? add_target : accum_target);
3198           val_so_far += val_so_far << log;
3199           break;
3200
3201         case alg_sub_factor:
3202           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3203           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3204                                  (add_target
3205                                   ? add_target : (optimize ? 0 : tem)));
3206           val_so_far = (val_so_far << log) - val_so_far;
3207           break;
3208
3209         default:
3210           gcc_unreachable ();
3211         }
3212
3213       if (SCALAR_INT_MODE_P (mode))
3214         {
3215           /* Write a REG_EQUAL note on the last insn so that we can cse
3216              multiplication sequences.  Note that if ACCUM is a SUBREG,
3217              we've set the inner register and must properly indicate that.  */
3218           tem = op0, nmode = mode;
3219           accum_inner = accum;
3220           if (GET_CODE (accum) == SUBREG)
3221             {
3222               accum_inner = SUBREG_REG (accum);
3223               nmode = GET_MODE (accum_inner);
3224               tem = gen_lowpart (nmode, op0);
3225             }
3226
3227           insn = get_last_insn ();
3228           set_dst_reg_note (insn, REG_EQUAL,
3229                             gen_rtx_MULT (nmode, tem,
3230                                           gen_int_mode (val_so_far, nmode)),
3231                             accum_inner);
3232         }
3233     }
3234
3235   if (variant == negate_variant)
3236     {
3237       val_so_far = -val_so_far;
3238       accum = expand_unop (mode, neg_optab, accum, target, 0);
3239     }
3240   else if (variant == add_variant)
3241     {
3242       val_so_far = val_so_far + 1;
3243       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3244     }
3245
3246   /* Compare only the bits of val and val_so_far that are significant
3247      in the result mode, to avoid sign-/zero-extension confusion.  */
3248   nmode = GET_MODE_INNER (mode);
3249   val &= GET_MODE_MASK (nmode);
3250   val_so_far &= GET_MODE_MASK (nmode);
3251   gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3252
3253   return accum;
3254 }
3255
3256 /* Perform a multiplication and return an rtx for the result.
3257    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3258    TARGET is a suggestion for where to store the result (an rtx).
3259
3260    We check specially for a constant integer as OP1.
3261    If you want this check for OP0 as well, then before calling
3262    you should swap the two operands if OP0 would be constant.  */
3263
3264 rtx
3265 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3266              int unsignedp)
3267 {
3268   enum mult_variant variant;
3269   struct algorithm algorithm;
3270   rtx scalar_op1;
3271   int max_cost;
3272   bool speed = optimize_insn_for_speed_p ();
3273   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3274
3275   if (CONSTANT_P (op0))
3276     std::swap (op0, op1);
3277
3278   /* For vectors, there are several simplifications that can be made if
3279      all elements of the vector constant are identical.  */
3280   scalar_op1 = unwrap_const_vec_duplicate (op1);
3281
3282   if (INTEGRAL_MODE_P (mode))
3283     {
3284       rtx fake_reg;
3285       HOST_WIDE_INT coeff;
3286       bool is_neg;
3287       int mode_bitsize;
3288
3289       if (op1 == CONST0_RTX (mode))
3290         return op1;
3291       if (op1 == CONST1_RTX (mode))
3292         return op0;
3293       if (op1 == CONSTM1_RTX (mode))
3294         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3295                             op0, target, 0);
3296
3297       if (do_trapv)
3298         goto skip_synth;
3299
3300       /* If mode is integer vector mode, check if the backend supports
3301          vector lshift (by scalar or vector) at all.  If not, we can't use
3302          synthetized multiply.  */
3303       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3304           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3305           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3306         goto skip_synth;
3307
3308       /* These are the operations that are potentially turned into
3309          a sequence of shifts and additions.  */
3310       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3311
3312       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3313          less than or equal in size to `unsigned int' this doesn't matter.
3314          If the mode is larger than `unsigned int', then synth_mult works
3315          only if the constant value exactly fits in an `unsigned int' without
3316          any truncation.  This means that multiplying by negative values does
3317          not work; results are off by 2^32 on a 32 bit machine.  */
3318       if (CONST_INT_P (scalar_op1))
3319         {
3320           coeff = INTVAL (scalar_op1);
3321           is_neg = coeff < 0;
3322         }
3323 #if TARGET_SUPPORTS_WIDE_INT
3324       else if (CONST_WIDE_INT_P (scalar_op1))
3325 #else
3326       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3327 #endif
3328         {
3329           int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3330           /* Perfect power of 2 (other than 1, which is handled above).  */
3331           if (shift > 0)
3332             return expand_shift (LSHIFT_EXPR, mode, op0,
3333                                  shift, target, unsignedp);
3334           else
3335             goto skip_synth;
3336         }
3337       else
3338         goto skip_synth;
3339
3340       /* We used to test optimize here, on the grounds that it's better to
3341          produce a smaller program when -O is not used.  But this causes
3342          such a terrible slowdown sometimes that it seems better to always
3343          use synth_mult.  */
3344
3345       /* Special case powers of two.  */
3346       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3347           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3348         return expand_shift (LSHIFT_EXPR, mode, op0,
3349                              floor_log2 (coeff), target, unsignedp);
3350
3351       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3352
3353       /* Attempt to handle multiplication of DImode values by negative
3354          coefficients, by performing the multiplication by a positive
3355          multiplier and then inverting the result.  */
3356       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3357         {
3358           /* Its safe to use -coeff even for INT_MIN, as the
3359              result is interpreted as an unsigned coefficient.
3360              Exclude cost of op0 from max_cost to match the cost
3361              calculation of the synth_mult.  */
3362           coeff = -(unsigned HOST_WIDE_INT) coeff;
3363           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3364                                     mode, speed)
3365                       - neg_cost (speed, mode));
3366           if (max_cost <= 0)
3367             goto skip_synth;
3368
3369           /* Special case powers of two.  */
3370           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3371             {
3372               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3373                                        floor_log2 (coeff), target, unsignedp);
3374               return expand_unop (mode, neg_optab, temp, target, 0);
3375             }
3376
3377           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3378                                    max_cost))
3379             {
3380               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3381                                             &algorithm, variant);
3382               return expand_unop (mode, neg_optab, temp, target, 0);
3383             }
3384           goto skip_synth;
3385         }
3386
3387       /* Exclude cost of op0 from max_cost to match the cost
3388          calculation of the synth_mult.  */
3389       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3390       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3391         return expand_mult_const (mode, op0, coeff, target,
3392                                   &algorithm, variant);
3393     }
3394  skip_synth:
3395
3396   /* Expand x*2.0 as x+x.  */
3397   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3398       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3399     {
3400       op0 = force_reg (GET_MODE (op0), op0);
3401       return expand_binop (mode, add_optab, op0, op0,
3402                            target, unsignedp, OPTAB_LIB_WIDEN);
3403     }
3404
3405   /* This used to use umul_optab if unsigned, but for non-widening multiply
3406      there is no difference between signed and unsigned.  */
3407   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3408                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3409   gcc_assert (op0);
3410   return op0;
3411 }
3412
3413 /* Return a cost estimate for multiplying a register by the given
3414    COEFFicient in the given MODE and SPEED.  */
3415
3416 int
3417 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3418 {
3419   int max_cost;
3420   struct algorithm algorithm;
3421   enum mult_variant variant;
3422
3423   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3424   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3425                            mode, speed);
3426   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3427     return algorithm.cost.cost;
3428   else
3429     return max_cost;
3430 }
3431
3432 /* Perform a widening multiplication and return an rtx for the result.
3433    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3434    TARGET is a suggestion for where to store the result (an rtx).
3435    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3436    or smul_widen_optab.
3437
3438    We check specially for a constant integer as OP1, comparing the
3439    cost of a widening multiply against the cost of a sequence of shifts
3440    and adds.  */
3441
3442 rtx
3443 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3444                       int unsignedp, optab this_optab)
3445 {
3446   bool speed = optimize_insn_for_speed_p ();
3447   rtx cop1;
3448
3449   if (CONST_INT_P (op1)
3450       && GET_MODE (op0) != VOIDmode
3451       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3452                                 this_optab == umul_widen_optab))
3453       && CONST_INT_P (cop1)
3454       && (INTVAL (cop1) >= 0
3455           || HWI_COMPUTABLE_MODE_P (mode)))
3456     {
3457       HOST_WIDE_INT coeff = INTVAL (cop1);
3458       int max_cost;
3459       enum mult_variant variant;
3460       struct algorithm algorithm;
3461
3462       if (coeff == 0)
3463         return CONST0_RTX (mode);
3464
3465       /* Special case powers of two.  */
3466       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3467         {
3468           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3469           return expand_shift (LSHIFT_EXPR, mode, op0,
3470                                floor_log2 (coeff), target, unsignedp);
3471         }
3472
3473       /* Exclude cost of op0 from max_cost to match the cost
3474          calculation of the synth_mult.  */
3475       max_cost = mul_widen_cost (speed, mode);
3476       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3477                                max_cost))
3478         {
3479           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3480           return expand_mult_const (mode, op0, coeff, target,
3481                                     &algorithm, variant);
3482         }
3483     }
3484   return expand_binop (mode, this_optab, op0, op1, target,
3485                        unsignedp, OPTAB_LIB_WIDEN);
3486 }
3487 \f
3488 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3489    replace division by D, and put the least significant N bits of the result
3490    in *MULTIPLIER_PTR and return the most significant bit.
3491
3492    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3493    needed precision is in PRECISION (should be <= N).
3494
3495    PRECISION should be as small as possible so this function can choose
3496    multiplier more freely.
3497
3498    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3499    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3500
3501    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3502    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3503
3504 unsigned HOST_WIDE_INT
3505 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3506                    unsigned HOST_WIDE_INT *multiplier_ptr,
3507                    int *post_shift_ptr, int *lgup_ptr)
3508 {
3509   int lgup, post_shift;
3510   int pow, pow2;
3511
3512   /* lgup = ceil(log2(divisor)); */
3513   lgup = ceil_log2 (d);
3514
3515   gcc_assert (lgup <= n);
3516
3517   pow = n + lgup;
3518   pow2 = n + lgup - precision;
3519
3520   /* mlow = 2^(N + lgup)/d */
3521   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3522   wide_int mlow = wi::udiv_trunc (val, d);
3523
3524   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3525   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3526   wide_int mhigh = wi::udiv_trunc (val, d);
3527
3528   /* If precision == N, then mlow, mhigh exceed 2^N
3529      (but they do not exceed 2^(N+1)).  */
3530
3531   /* Reduce to lowest terms.  */
3532   for (post_shift = lgup; post_shift > 0; post_shift--)
3533     {
3534       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3535                                                        HOST_BITS_PER_WIDE_INT);
3536       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3537                                                        HOST_BITS_PER_WIDE_INT);
3538       if (ml_lo >= mh_lo)
3539         break;
3540
3541       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3542       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3543     }
3544
3545   *post_shift_ptr = post_shift;
3546   *lgup_ptr = lgup;
3547   if (n < HOST_BITS_PER_WIDE_INT)
3548     {
3549       unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3550       *multiplier_ptr = mhigh.to_uhwi () & mask;
3551       return mhigh.to_uhwi () >= mask;
3552     }
3553   else
3554     {
3555       *multiplier_ptr = mhigh.to_uhwi ();
3556       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3557     }
3558 }
3559
3560 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3561    congruent to 1 (mod 2**N).  */
3562
3563 static unsigned HOST_WIDE_INT
3564 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3565 {
3566   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3567
3568   /* The algorithm notes that the choice y = x satisfies
3569      x*y == 1 mod 2^3, since x is assumed odd.
3570      Each iteration doubles the number of bits of significance in y.  */
3571
3572   unsigned HOST_WIDE_INT mask;
3573   unsigned HOST_WIDE_INT y = x;
3574   int nbit = 3;
3575
3576   mask = (n == HOST_BITS_PER_WIDE_INT
3577           ? HOST_WIDE_INT_M1U
3578           : (HOST_WIDE_INT_1U << n) - 1);
3579
3580   while (nbit < n)
3581     {
3582       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3583       nbit *= 2;
3584     }
3585   return y;
3586 }
3587
3588 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3589    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3590    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3591    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3592    become signed.
3593
3594    The result is put in TARGET if that is convenient.
3595
3596    MODE is the mode of operation.  */
3597
3598 rtx
3599 expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
3600                              rtx op1, rtx target, int unsignedp)
3601 {
3602   rtx tem;
3603   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3604
3605   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3606                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3607   tem = expand_and (mode, tem, op1, NULL_RTX);
3608   adj_operand
3609     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3610                      adj_operand);
3611
3612   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3613                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3614   tem = expand_and (mode, tem, op0, NULL_RTX);
3615   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3616                           target);
3617
3618   return target;
3619 }
3620
3621 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3622
3623 static rtx
3624 extract_high_half (machine_mode mode, rtx op)
3625 {
3626   machine_mode wider_mode;
3627
3628   if (mode == word_mode)
3629     return gen_highpart (mode, op);
3630
3631   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3632
3633   wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3634   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3635                      GET_MODE_BITSIZE (mode), 0, 1);
3636   return convert_modes (mode, wider_mode, op, 0);
3637 }
3638
3639 /* Like expmed_mult_highpart, but only consider using a multiplication
3640    optab.  OP1 is an rtx for the constant operand.  */
3641
3642 static rtx
3643 expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
3644                             rtx target, int unsignedp, int max_cost)
3645 {
3646   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3647   machine_mode wider_mode;
3648   optab moptab;
3649   rtx tem;
3650   int size;
3651   bool speed = optimize_insn_for_speed_p ();
3652
3653   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3654
3655   wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3656   size = GET_MODE_BITSIZE (mode);
3657
3658   /* Firstly, try using a multiplication insn that only generates the needed
3659      high part of the product, and in the sign flavor of unsignedp.  */
3660   if (mul_highpart_cost (speed, mode) < max_cost)
3661     {
3662       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3663       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3664                           unsignedp, OPTAB_DIRECT);
3665       if (tem)
3666         return tem;
3667     }
3668
3669   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3670      Need to adjust the result after the multiplication.  */
3671   if (size - 1 < BITS_PER_WORD
3672       && (mul_highpart_cost (speed, mode)
3673           + 2 * shift_cost (speed, mode, size-1)
3674           + 4 * add_cost (speed, mode) < max_cost))
3675     {
3676       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3677       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3678                           unsignedp, OPTAB_DIRECT);
3679       if (tem)
3680         /* We used the wrong signedness.  Adjust the result.  */
3681         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3682                                             tem, unsignedp);
3683     }
3684
3685   /* Try widening multiplication.  */
3686   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3687   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3688       && mul_widen_cost (speed, wider_mode) < max_cost)
3689     {
3690       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3691                           unsignedp, OPTAB_WIDEN);
3692       if (tem)
3693         return extract_high_half (mode, tem);
3694     }
3695
3696   /* Try widening the mode and perform a non-widening multiplication.  */
3697   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3698       && size - 1 < BITS_PER_WORD
3699       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3700           < max_cost))
3701     {
3702       rtx_insn *insns;
3703       rtx wop0, wop1;
3704
3705       /* We need to widen the operands, for example to ensure the
3706          constant multiplier is correctly sign or zero extended.
3707          Use a sequence to clean-up any instructions emitted by
3708          the conversions if things don't work out.  */
3709       start_sequence ();
3710       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3711       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3712       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3713                           unsignedp, OPTAB_WIDEN);
3714       insns = get_insns ();
3715       end_sequence ();
3716
3717       if (tem)
3718         {
3719           emit_insn (insns);
3720           return extract_high_half (mode, tem);
3721         }
3722     }
3723
3724   /* Try widening multiplication of opposite signedness, and adjust.  */
3725   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3726   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3727       && size - 1 < BITS_PER_WORD
3728       && (mul_widen_cost (speed, wider_mode)
3729           + 2 * shift_cost (speed, mode, size-1)
3730           + 4 * add_cost (speed, mode) < max_cost))
3731     {
3732       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3733                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3734       if (tem != 0)
3735         {
3736           tem = extract_high_half (mode, tem);
3737           /* We used the wrong signedness.  Adjust the result.  */
3738           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3739                                               target, unsignedp);
3740         }
3741     }
3742
3743   return 0;
3744 }
3745
3746 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3747    putting the high half of the result in TARGET if that is convenient,
3748    and return where the result is.  If the operation can not be performed,
3749    0 is returned.
3750
3751    MODE is the mode of operation and result.
3752
3753    UNSIGNEDP nonzero means unsigned multiply.
3754
3755    MAX_COST is the total allowed cost for the expanded RTL.  */
3756
3757 static rtx
3758 expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
3759                       rtx target, int unsignedp, int max_cost)
3760 {
3761   machine_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3762   unsigned HOST_WIDE_INT cnst1;
3763   int extra_cost;
3764   bool sign_adjust = false;
3765   enum mult_variant variant;
3766   struct algorithm alg;
3767   rtx tem;
3768   bool speed = optimize_insn_for_speed_p ();
3769
3770   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3771   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3772   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3773
3774   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3775
3776   /* We can't optimize modes wider than BITS_PER_WORD.
3777      ??? We might be able to perform double-word arithmetic if
3778      mode == word_mode, however all the cost calculations in
3779      synth_mult etc. assume single-word operations.  */
3780   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3781     return expmed_mult_highpart_optab (mode, op0, op1, target,
3782                                        unsignedp, max_cost);
3783
3784   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3785
3786   /* Check whether we try to multiply by a negative constant.  */
3787   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3788     {
3789       sign_adjust = true;
3790       extra_cost += add_cost (speed, mode);
3791     }
3792
3793   /* See whether shift/add multiplication is cheap enough.  */
3794   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3795                            max_cost - extra_cost))
3796     {
3797       /* See whether the specialized multiplication optabs are
3798          cheaper than the shift/add version.  */
3799       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3800                                         alg.cost.cost + extra_cost);
3801       if (tem)
3802         return tem;
3803
3804       tem = convert_to_mode (wider_mode, op0, unsignedp);
3805       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3806       tem = extract_high_half (mode, tem);
3807
3808       /* Adjust result for signedness.  */
3809       if (sign_adjust)
3810         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3811
3812       return tem;
3813     }
3814   return expmed_mult_highpart_optab (mode, op0, op1, target,
3815                                      unsignedp, max_cost);
3816 }
3817
3818
3819 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3820
3821 static rtx
3822 expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3823 {
3824   rtx result, temp, shift;
3825   rtx_code_label *label;
3826   int logd;
3827   int prec = GET_MODE_PRECISION (mode);
3828
3829   logd = floor_log2 (d);
3830   result = gen_reg_rtx (mode);
3831
3832   /* Avoid conditional branches when they're expensive.  */
3833   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3834       && optimize_insn_for_speed_p ())
3835     {
3836       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3837                                       mode, 0, -1);
3838       if (signmask)
3839         {
3840           HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
3841           signmask = force_reg (mode, signmask);
3842           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3843
3844           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3845              which instruction sequence to use.  If logical right shifts
3846              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3847              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3848
3849           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3850           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3851               || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
3852                   > COSTS_N_INSNS (2)))
3853             {
3854               temp = expand_binop (mode, xor_optab, op0, signmask,
3855                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3856               temp = expand_binop (mode, sub_optab, temp, signmask,
3857                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3858               temp = expand_binop (mode, and_optab, temp,
3859                                    gen_int_mode (masklow, mode),
3860                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3861               temp = expand_binop (mode, xor_optab, temp, signmask,
3862                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3863               temp = expand_binop (mode, sub_optab, temp, signmask,
3864                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3865             }
3866           else
3867             {
3868               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3869                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3870               signmask = force_reg (mode, signmask);
3871
3872               temp = expand_binop (mode, add_optab, op0, signmask,
3873                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3874               temp = expand_binop (mode, and_optab, temp,
3875                                    gen_int_mode (masklow, mode),
3876                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3877               temp = expand_binop (mode, sub_optab, temp, signmask,
3878                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3879             }
3880           return temp;
3881         }
3882     }
3883
3884   /* Mask contains the mode's signbit and the significant bits of the
3885      modulus.  By including the signbit in the operation, many targets
3886      can avoid an explicit compare operation in the following comparison
3887      against zero.  */
3888   wide_int mask = wi::mask (logd, false, prec);
3889   mask = wi::set_bit (mask, prec - 1);
3890
3891   temp = expand_binop (mode, and_optab, op0,
3892                        immed_wide_int_const (mask, mode),
3893                        result, 1, OPTAB_LIB_WIDEN);
3894   if (temp != result)
3895     emit_move_insn (result, temp);
3896
3897   label = gen_label_rtx ();
3898   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3899
3900   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3901                        0, OPTAB_LIB_WIDEN);
3902
3903   mask = wi::mask (logd, true, prec);
3904   temp = expand_binop (mode, ior_optab, temp,
3905                        immed_wide_int_const (mask, mode),
3906                        result, 1, OPTAB_LIB_WIDEN);
3907   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3908                        0, OPTAB_LIB_WIDEN);
3909   if (temp != result)
3910     emit_move_insn (result, temp);
3911   emit_label (label);
3912   return result;
3913 }
3914
3915 /* Expand signed division of OP0 by a power of two D in mode MODE.
3916    This routine is only called for positive values of D.  */
3917
3918 static rtx
3919 expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3920 {
3921   rtx temp;
3922   rtx_code_label *label;
3923   int logd;
3924
3925   logd = floor_log2 (d);
3926
3927   if (d == 2
3928       && BRANCH_COST (optimize_insn_for_speed_p (),
3929                       false) >= 1)
3930     {
3931       temp = gen_reg_rtx (mode);
3932       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3933       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3934                            0, OPTAB_LIB_WIDEN);
3935       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3936     }
3937
3938   if (HAVE_conditional_move
3939       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
3940     {
3941       rtx temp2;
3942
3943       start_sequence ();
3944       temp2 = copy_to_mode_reg (mode, op0);
3945       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3946                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3947       temp = force_reg (mode, temp);
3948
3949       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3950       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3951                                      mode, temp, temp2, mode, 0);
3952       if (temp2)
3953         {
3954           rtx_insn *seq = get_insns ();
3955           end_sequence ();
3956           emit_insn (seq);
3957           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3958         }
3959       end_sequence ();
3960     }
3961
3962   if (BRANCH_COST (optimize_insn_for_speed_p (),
3963                    false) >= 2)
3964     {
3965       int ushift = GET_MODE_BITSIZE (mode) - logd;
3966
3967       temp = gen_reg_rtx (mode);
3968       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3969       if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3970           || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3971              > COSTS_N_INSNS (1))
3972         temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3973                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3974       else
3975         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3976                              ushift, NULL_RTX, 1);
3977       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3978                            0, OPTAB_LIB_WIDEN);
3979       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3980     }
3981
3982   label = gen_label_rtx ();
3983   temp = copy_to_mode_reg (mode, op0);
3984   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3985   expand_inc (temp, gen_int_mode (d - 1, mode));
3986   emit_label (label);
3987   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3988 }
3989 \f
3990 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3991    if that is convenient, and returning where the result is.
3992    You may request either the quotient or the remainder as the result;
3993    specify REM_FLAG nonzero to get the remainder.
3994
3995    CODE is the expression code for which kind of division this is;
3996    it controls how rounding is done.  MODE is the machine mode to use.
3997    UNSIGNEDP nonzero means do unsigned division.  */
3998
3999 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4000    and then correct it by or'ing in missing high bits
4001    if result of ANDI is nonzero.
4002    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4003    This could optimize to a bfexts instruction.
4004    But C doesn't use these operations, so their optimizations are
4005    left for later.  */
4006 /* ??? For modulo, we don't actually need the highpart of the first product,
4007    the low part will do nicely.  And for small divisors, the second multiply
4008    can also be a low-part only multiply or even be completely left out.
4009    E.g. to calculate the remainder of a division by 3 with a 32 bit
4010    multiply, multiply with 0x55555556 and extract the upper two bits;
4011    the result is exact for inputs up to 0x1fffffff.
4012    The input range can be reduced by using cross-sum rules.
4013    For odd divisors >= 3, the following table gives right shift counts
4014    so that if a number is shifted by an integer multiple of the given
4015    amount, the remainder stays the same:
4016    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4017    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4018    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4019    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4020    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4021
4022    Cross-sum rules for even numbers can be derived by leaving as many bits
4023    to the right alone as the divisor has zeros to the right.
4024    E.g. if x is an unsigned 32 bit number:
4025    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4026    */
4027
4028 rtx
4029 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4030                rtx op0, rtx op1, rtx target, int unsignedp)
4031 {
4032   machine_mode compute_mode;
4033   rtx tquotient;
4034   rtx quotient = 0, remainder = 0;
4035   rtx_insn *last;
4036   int size;
4037   rtx_insn *insn;
4038   optab optab1, optab2;
4039   int op1_is_constant, op1_is_pow2 = 0;
4040   int max_cost, extra_cost;
4041   static HOST_WIDE_INT last_div_const = 0;
4042   bool speed = optimize_insn_for_speed_p ();
4043
4044   op1_is_constant = CONST_INT_P (op1);
4045   if (op1_is_constant)
4046     {
4047       wide_int ext_op1 = rtx_mode_t (op1, mode);
4048       op1_is_pow2 = (wi::popcount (ext_op1) == 1
4049                      || (! unsignedp
4050                          && wi::popcount (wi::neg (ext_op1)) == 1));
4051     }
4052
4053   /*
4054      This is the structure of expand_divmod:
4055
4056      First comes code to fix up the operands so we can perform the operations
4057      correctly and efficiently.
4058
4059      Second comes a switch statement with code specific for each rounding mode.
4060      For some special operands this code emits all RTL for the desired
4061      operation, for other cases, it generates only a quotient and stores it in
4062      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4063      to indicate that it has not done anything.
4064
4065      Last comes code that finishes the operation.  If QUOTIENT is set and
4066      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4067      QUOTIENT is not set, it is computed using trunc rounding.
4068
4069      We try to generate special code for division and remainder when OP1 is a
4070      constant.  If |OP1| = 2**n we can use shifts and some other fast
4071      operations.  For other values of OP1, we compute a carefully selected
4072      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4073      by m.
4074
4075      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4076      half of the product.  Different strategies for generating the product are
4077      implemented in expmed_mult_highpart.
4078
4079      If what we actually want is the remainder, we generate that by another
4080      by-constant multiplication and a subtraction.  */
4081
4082   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4083      code below will malfunction if we are, so check here and handle
4084      the special case if so.  */
4085   if (op1 == const1_rtx)
4086     return rem_flag ? const0_rtx : op0;
4087
4088     /* When dividing by -1, we could get an overflow.
4089      negv_optab can handle overflows.  */
4090   if (! unsignedp && op1 == constm1_rtx)
4091     {
4092       if (rem_flag)
4093         return const0_rtx;
4094       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4095                           ? negv_optab : neg_optab, op0, target, 0);
4096     }
4097
4098   if (target
4099       /* Don't use the function value register as a target
4100          since we have to read it as well as write it,
4101          and function-inlining gets confused by this.  */
4102       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4103           /* Don't clobber an operand while doing a multi-step calculation.  */
4104           || ((rem_flag || op1_is_constant)
4105               && (reg_mentioned_p (target, op0)
4106                   || (MEM_P (op0) && MEM_P (target))))
4107           || reg_mentioned_p (target, op1)
4108           || (MEM_P (op1) && MEM_P (target))))
4109     target = 0;
4110
4111   /* Get the mode in which to perform this computation.  Normally it will
4112      be MODE, but sometimes we can't do the desired operation in MODE.
4113      If so, pick a wider mode in which we can do the operation.  Convert
4114      to that mode at the start to avoid repeated conversions.
4115
4116      First see what operations we need.  These depend on the expression
4117      we are evaluating.  (We assume that divxx3 insns exist under the
4118      same conditions that modxx3 insns and that these insns don't normally
4119      fail.  If these assumptions are not correct, we may generate less
4120      efficient code in some cases.)
4121
4122      Then see if we find a mode in which we can open-code that operation
4123      (either a division, modulus, or shift).  Finally, check for the smallest
4124      mode for which we can do the operation with a library call.  */
4125
4126   /* We might want to refine this now that we have division-by-constant
4127      optimization.  Since expmed_mult_highpart tries so many variants, it is
4128      not straightforward to generalize this.  Maybe we should make an array
4129      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4130
4131   optab1 = (op1_is_pow2
4132             ? (unsignedp ? lshr_optab : ashr_optab)
4133             : (unsignedp ? udiv_optab : sdiv_optab));
4134   optab2 = (op1_is_pow2 ? optab1
4135             : (unsignedp ? udivmod_optab : sdivmod_optab));
4136
4137   FOR_EACH_MODE_FROM (compute_mode, mode)
4138     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4139         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4140       break;
4141
4142   if (compute_mode == VOIDmode)
4143     FOR_EACH_MODE_FROM (compute_mode, mode)
4144       if (optab_libfunc (optab1, compute_mode)
4145           || optab_libfunc (optab2, compute_mode))
4146         break;
4147
4148   /* If we still couldn't find a mode, use MODE, but expand_binop will
4149      probably die.  */
4150   if (compute_mode == VOIDmode)
4151     compute_mode = mode;
4152
4153   if (target && GET_MODE (target) == compute_mode)
4154     tquotient = target;
4155   else
4156     tquotient = gen_reg_rtx (compute_mode);
4157
4158   size = GET_MODE_BITSIZE (compute_mode);
4159 #if 0
4160   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4161      (mode), and thereby get better code when OP1 is a constant.  Do that
4162      later.  It will require going over all usages of SIZE below.  */
4163   size = GET_MODE_BITSIZE (mode);
4164 #endif
4165
4166   /* Only deduct something for a REM if the last divide done was
4167      for a different constant.   Then set the constant of the last
4168      divide.  */
4169   max_cost = (unsignedp
4170               ? udiv_cost (speed, compute_mode)
4171               : sdiv_cost (speed, compute_mode));
4172   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4173                      && INTVAL (op1) == last_div_const))
4174     max_cost -= (mul_cost (speed, compute_mode)
4175                  + add_cost (speed, compute_mode));
4176
4177   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4178
4179   /* Now convert to the best mode to use.  */
4180   if (compute_mode != mode)
4181     {
4182       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4183       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4184
4185       /* convert_modes may have placed op1 into a register, so we
4186          must recompute the following.  */
4187       op1_is_constant = CONST_INT_P (op1);
4188       if (op1_is_constant)
4189         {
4190           wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4191           op1_is_pow2 = (wi::popcount (ext_op1) == 1
4192                          || (! unsignedp
4193                              && wi::popcount (wi::neg (ext_op1)) == 1));
4194         }
4195       else
4196         op1_is_pow2 = 0;
4197     }
4198
4199   /* If one of the operands is a volatile MEM, copy it into a register.  */
4200
4201   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4202     op0 = force_reg (compute_mode, op0);
4203   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4204     op1 = force_reg (compute_mode, op1);
4205
4206   /* If we need the remainder or if OP1 is constant, we need to
4207      put OP0 in a register in case it has any queued subexpressions.  */
4208   if (rem_flag || op1_is_constant)
4209     op0 = force_reg (compute_mode, op0);
4210
4211   last = get_last_insn ();
4212
4213   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4214   if (unsignedp)
4215     {
4216       if (code == FLOOR_DIV_EXPR)
4217         code = TRUNC_DIV_EXPR;
4218       if (code == FLOOR_MOD_EXPR)
4219         code = TRUNC_MOD_EXPR;
4220       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4221         code = TRUNC_DIV_EXPR;
4222     }
4223
4224   if (op1 != const0_rtx)
4225     switch (code)
4226       {
4227       case TRUNC_MOD_EXPR:
4228       case TRUNC_DIV_EXPR:
4229         if (op1_is_constant)
4230           {
4231             if (unsignedp)
4232               {
4233                 unsigned HOST_WIDE_INT mh, ml;
4234                 int pre_shift, post_shift;
4235                 int dummy;
4236                 wide_int wd = rtx_mode_t (op1, compute_mode);
4237                 unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4238
4239                 if (wi::popcount (wd) == 1)
4240                   {
4241                     pre_shift = floor_log2 (d);
4242                     if (rem_flag)
4243                       {
4244                         unsigned HOST_WIDE_INT mask
4245                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4246                         remainder
4247                           = expand_binop (compute_mode, and_optab, op0,
4248                                           gen_int_mode (mask, compute_mode),
4249                                           remainder, 1,
4250                                           OPTAB_LIB_WIDEN);
4251                         if (remainder)
4252                           return gen_lowpart (mode, remainder);
4253                       }
4254                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4255                                              pre_shift, tquotient, 1);
4256                   }
4257                 else if (size <= HOST_BITS_PER_WIDE_INT)
4258                   {
4259                     if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4260                       {
4261                         /* Most significant bit of divisor is set; emit an scc
4262                            insn.  */
4263                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4264                                                           compute_mode, 1, 1);
4265                       }
4266                     else
4267                       {
4268                         /* Find a suitable multiplier and right shift count
4269                            instead of multiplying with D.  */
4270
4271                         mh = choose_multiplier (d, size, size,
4272                                                 &ml, &post_shift, &dummy);
4273
4274                         /* If the suggested multiplier is more than SIZE bits,
4275                            we can do better for even divisors, using an
4276                            initial right shift.  */
4277                         if (mh != 0 && (d & 1) == 0)
4278                           {
4279                             pre_shift = ctz_or_zero (d);
4280                             mh = choose_multiplier (d >> pre_shift, size,
4281                                                     size - pre_shift,
4282                                                     &ml, &post_shift, &dummy);
4283                             gcc_assert (!mh);
4284                           }
4285                         else
4286                           pre_shift = 0;
4287
4288                         if (mh != 0)
4289                           {
4290                             rtx t1, t2, t3, t4;
4291
4292                             if (post_shift - 1 >= BITS_PER_WORD)
4293                               goto fail1;
4294
4295                             extra_cost
4296                               = (shift_cost (speed, compute_mode, post_shift - 1)
4297                                  + shift_cost (speed, compute_mode, 1)
4298                                  + 2 * add_cost (speed, compute_mode));
4299                             t1 = expmed_mult_highpart
4300                               (compute_mode, op0,
4301                                gen_int_mode (ml, compute_mode),
4302                                NULL_RTX, 1, max_cost - extra_cost);
4303                             if (t1 == 0)
4304                               goto fail1;
4305                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4306                                                                op0, t1),
4307                                                 NULL_RTX);
4308                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4309                                                t2, 1, NULL_RTX, 1);
4310                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4311                                                               t1, t3),
4312                                                 NULL_RTX);
4313                             quotient = expand_shift
4314                               (RSHIFT_EXPR, compute_mode, t4,
4315                                post_shift - 1, tquotient, 1);
4316                           }
4317                         else
4318                           {
4319                             rtx t1, t2;
4320
4321                             if (pre_shift >= BITS_PER_WORD
4322                                 || post_shift >= BITS_PER_WORD)
4323                               goto fail1;
4324
4325                             t1 = expand_shift
4326                               (RSHIFT_EXPR, compute_mode, op0,
4327                                pre_shift, NULL_RTX, 1);
4328                             extra_cost
4329                               = (shift_cost (speed, compute_mode, pre_shift)
4330                                  + shift_cost (speed, compute_mode, post_shift));
4331                             t2 = expmed_mult_highpart
4332                               (compute_mode, t1,
4333                                gen_int_mode (ml, compute_mode),
4334                                NULL_RTX, 1, max_cost - extra_cost);
4335                             if (t2 == 0)
4336                               goto fail1;
4337                             quotient = expand_shift
4338                               (RSHIFT_EXPR, compute_mode, t2,
4339                                post_shift, tquotient, 1);
4340                           }
4341                       }
4342                   }
4343                 else            /* Too wide mode to use tricky code */
4344                   break;
4345
4346                 insn = get_last_insn ();
4347                 if (insn != last)
4348                   set_dst_reg_note (insn, REG_EQUAL,
4349                                     gen_rtx_UDIV (compute_mode, op0, op1),
4350                                     quotient);
4351               }
4352             else                /* TRUNC_DIV, signed */
4353               {
4354                 unsigned HOST_WIDE_INT ml;
4355                 int lgup, post_shift;
4356                 rtx mlr;
4357                 HOST_WIDE_INT d = INTVAL (op1);
4358                 unsigned HOST_WIDE_INT abs_d;
4359
4360                 /* Since d might be INT_MIN, we have to cast to
4361                    unsigned HOST_WIDE_INT before negating to avoid
4362                    undefined signed overflow.  */
4363                 abs_d = (d >= 0
4364                          ? (unsigned HOST_WIDE_INT) d
4365                          : - (unsigned HOST_WIDE_INT) d);
4366
4367                 /* n rem d = n rem -d */
4368                 if (rem_flag && d < 0)
4369                   {
4370                     d = abs_d;
4371                     op1 = gen_int_mode (abs_d, compute_mode);
4372                   }
4373
4374                 if (d == 1)
4375                   quotient = op0;
4376                 else if (d == -1)
4377                   quotient = expand_unop (compute_mode, neg_optab, op0,
4378                                           tquotient, 0);
4379                 else if (size <= HOST_BITS_PER_WIDE_INT
4380                          && abs_d == HOST_WIDE_INT_1U << (size - 1))
4381                   {
4382                     /* This case is not handled correctly below.  */
4383                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4384                                                 compute_mode, 1, 1);
4385                     if (quotient == 0)
4386                       goto fail1;
4387                   }
4388                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4389                          && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4390                          && (rem_flag
4391                              ? smod_pow2_cheap (speed, compute_mode)
4392                              : sdiv_pow2_cheap (speed, compute_mode))
4393                          /* We assume that cheap metric is true if the
4394                             optab has an expander for this mode.  */
4395                          && ((optab_handler ((rem_flag ? smod_optab
4396                                               : sdiv_optab),
4397                                              compute_mode)
4398                               != CODE_FOR_nothing)
4399                              || (optab_handler (sdivmod_optab,
4400                                                 compute_mode)
4401                                  != CODE_FOR_nothing)))
4402                   ;
4403                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d)
4404                          && (size <= HOST_BITS_PER_WIDE_INT
4405                              || abs_d != (unsigned HOST_WIDE_INT) d))
4406                   {
4407                     if (rem_flag)
4408                       {
4409                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4410                         if (remainder)
4411                           return gen_lowpart (mode, remainder);
4412                       }
4413
4414                     if (sdiv_pow2_cheap (speed, compute_mode)
4415                         && ((optab_handler (sdiv_optab, compute_mode)
4416                              != CODE_FOR_nothing)
4417                             || (optab_handler (sdivmod_optab, compute_mode)
4418                                 != CODE_FOR_nothing)))
4419                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4420                                                 compute_mode, op0,
4421                                                 gen_int_mode (abs_d,
4422                                                               compute_mode),
4423                                                 NULL_RTX, 0);
4424                     else
4425                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4426
4427                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4428                        negate the quotient.  */
4429                     if (d < 0)
4430                       {
4431                         insn = get_last_insn ();
4432                         if (insn != last
4433                             && abs_d < (HOST_WIDE_INT_1U
4434                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4435                           set_dst_reg_note (insn, REG_EQUAL,
4436                                             gen_rtx_DIV (compute_mode, op0,
4437                                                          gen_int_mode
4438                                                            (abs_d,
4439                                                             compute_mode)),
4440                                             quotient);
4441
4442                         quotient = expand_unop (compute_mode, neg_optab,
4443                                                 quotient, quotient, 0);
4444                       }
4445                   }
4446                 else if (size <= HOST_BITS_PER_WIDE_INT)
4447                   {
4448                     choose_multiplier (abs_d, size, size - 1,
4449                                        &ml, &post_shift, &lgup);
4450                     if (ml < HOST_WIDE_INT_1U << (size - 1))
4451                       {
4452                         rtx t1, t2, t3;
4453
4454                         if (post_shift >= BITS_PER_WORD
4455                             || size - 1 >= BITS_PER_WORD)
4456                           goto fail1;
4457
4458                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4459                                       + shift_cost (speed, compute_mode, size - 1)
4460                                       + add_cost (speed, compute_mode));
4461                         t1 = expmed_mult_highpart
4462                           (compute_mode, op0, gen_int_mode (ml, compute_mode),
4463                            NULL_RTX, 0, max_cost - extra_cost);
4464                         if (t1 == 0)
4465                           goto fail1;
4466                         t2 = expand_shift
4467                           (RSHIFT_EXPR, compute_mode, t1,
4468                            post_shift, NULL_RTX, 0);
4469                         t3 = expand_shift
4470                           (RSHIFT_EXPR, compute_mode, op0,
4471                            size - 1, NULL_RTX, 0);
4472                         if (d < 0)
4473                           quotient
4474                             = force_operand (gen_rtx_MINUS (compute_mode,
4475                                                             t3, t2),
4476                                              tquotient);
4477                         else
4478                           quotient
4479                             = force_operand (gen_rtx_MINUS (compute_mode,
4480                                                             t2, t3),
4481                                              tquotient);
4482                       }
4483                     else
4484                       {
4485                         rtx t1, t2, t3, t4;
4486
4487                         if (post_shift >= BITS_PER_WORD
4488                             || size - 1 >= BITS_PER_WORD)
4489                           goto fail1;
4490
4491                         ml |= HOST_WIDE_INT_M1U << (size - 1);
4492                         mlr = gen_int_mode (ml, compute_mode);
4493                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4494                                       + shift_cost (speed, compute_mode, size - 1)
4495                                       + 2 * add_cost (speed, compute_mode));
4496                         t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4497                                                    NULL_RTX, 0,
4498                                                    max_cost - extra_cost);
4499                         if (t1 == 0)
4500                           goto fail1;
4501                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4502                                                           t1, op0),
4503                                             NULL_RTX);
4504                         t3 = expand_shift
4505                           (RSHIFT_EXPR, compute_mode, t2,
4506                            post_shift, NULL_RTX, 0);
4507                         t4 = expand_shift
4508                           (RSHIFT_EXPR, compute_mode, op0,
4509                            size - 1, NULL_RTX, 0);
4510                         if (d < 0)
4511                           quotient
4512                             = force_operand (gen_rtx_MINUS (compute_mode,
4513                                                             t4, t3),
4514                                              tquotient);
4515                         else
4516                           quotient
4517                             = force_operand (gen_rtx_MINUS (compute_mode,
4518                                                             t3, t4),
4519                                              tquotient);
4520                       }
4521                   }
4522                 else            /* Too wide mode to use tricky code */
4523                   break;
4524
4525                 insn = get_last_insn ();
4526                 if (insn != last)
4527                   set_dst_reg_note (insn, REG_EQUAL,
4528                                     gen_rtx_DIV (compute_mode, op0, op1),
4529                                     quotient);
4530               }
4531             break;
4532           }
4533       fail1:
4534         delete_insns_since (last);
4535         break;
4536
4537       case FLOOR_DIV_EXPR:
4538       case FLOOR_MOD_EXPR:
4539       /* We will come here only for signed operations.  */
4540         if (op1_is_constant && size <= HOST_BITS_PER_WIDE_INT)
4541           {
4542             unsigned HOST_WIDE_INT mh, ml;
4543             int pre_shift, lgup, post_shift;
4544             HOST_WIDE_INT d = INTVAL (op1);
4545
4546             if (d > 0)
4547               {
4548                 /* We could just as easily deal with negative constants here,
4549                    but it does not seem worth the trouble for GCC 2.6.  */
4550                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4551                   {
4552                     pre_shift = floor_log2 (d);
4553                     if (rem_flag)
4554                       {
4555                         unsigned HOST_WIDE_INT mask
4556                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4557                         remainder = expand_binop
4558                           (compute_mode, and_optab, op0,
4559                            gen_int_mode (mask, compute_mode),
4560                            remainder, 0, OPTAB_LIB_WIDEN);
4561                         if (remainder)
4562                           return gen_lowpart (mode, remainder);
4563                       }
4564                     quotient = expand_shift
4565                       (RSHIFT_EXPR, compute_mode, op0,
4566                        pre_shift, tquotient, 0);
4567                   }
4568                 else
4569                   {
4570                     rtx t1, t2, t3, t4;
4571
4572                     mh = choose_multiplier (d, size, size - 1,
4573                                             &ml, &post_shift, &lgup);
4574                     gcc_assert (!mh);
4575
4576                     if (post_shift < BITS_PER_WORD
4577                         && size - 1 < BITS_PER_WORD)
4578                       {
4579                         t1 = expand_shift
4580                           (RSHIFT_EXPR, compute_mode, op0,
4581                            size - 1, NULL_RTX, 0);
4582                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4583                                            NULL_RTX, 0, OPTAB_WIDEN);
4584                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4585                                       + shift_cost (speed, compute_mode, size - 1)
4586                                       + 2 * add_cost (speed, compute_mode));
4587                         t3 = expmed_mult_highpart
4588                           (compute_mode, t2, gen_int_mode (ml, compute_mode),
4589                            NULL_RTX, 1, max_cost - extra_cost);
4590                         if (t3 != 0)
4591                           {
4592                             t4 = expand_shift
4593                               (RSHIFT_EXPR, compute_mode, t3,
4594                                post_shift, NULL_RTX, 1);
4595                             quotient = expand_binop (compute_mode, xor_optab,
4596                                                      t4, t1, tquotient, 0,
4597                                                      OPTAB_WIDEN);
4598                           }
4599                       }
4600                   }
4601               }
4602             else
4603               {
4604                 rtx nsign, t1, t2, t3, t4;
4605                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4606                                                   op0, constm1_rtx), NULL_RTX);
4607                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4608                                    0, OPTAB_WIDEN);
4609                 nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
4610                                       size - 1, NULL_RTX, 0);
4611                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4612                                     NULL_RTX);
4613                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4614                                     NULL_RTX, 0);
4615                 if (t4)
4616                   {
4617                     rtx t5;
4618                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4619                                       NULL_RTX, 0);
4620                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4621                                                             t4, t5),
4622                                               tquotient);
4623                   }
4624               }
4625           }
4626
4627         if (quotient != 0)
4628           break;
4629         delete_insns_since (last);
4630
4631         /* Try using an instruction that produces both the quotient and
4632            remainder, using truncation.  We can easily compensate the quotient
4633            or remainder to get floor rounding, once we have the remainder.
4634            Notice that we compute also the final remainder value here,
4635            and return the result right away.  */
4636         if (target == 0 || GET_MODE (target) != compute_mode)
4637           target = gen_reg_rtx (compute_mode);
4638
4639         if (rem_flag)
4640           {
4641             remainder
4642               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4643             quotient = gen_reg_rtx (compute_mode);
4644           }
4645         else
4646           {
4647             quotient
4648               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4649             remainder = gen_reg_rtx (compute_mode);
4650           }
4651
4652         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4653                                  quotient, remainder, 0))
4654           {
4655             /* This could be computed with a branch-less sequence.
4656                Save that for later.  */
4657             rtx tem;
4658             rtx_code_label *label = gen_label_rtx ();
4659             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4660             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4661                                 NULL_RTX, 0, OPTAB_WIDEN);
4662             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4663             expand_dec (quotient, const1_rtx);
4664             expand_inc (remainder, op1);
4665             emit_label (label);
4666             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4667           }
4668
4669         /* No luck with division elimination or divmod.  Have to do it
4670            by conditionally adjusting op0 *and* the result.  */
4671         {
4672           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4673           rtx adjusted_op0;
4674           rtx tem;
4675
4676           quotient = gen_reg_rtx (compute_mode);
4677           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4678           label1 = gen_label_rtx ();
4679           label2 = gen_label_rtx ();
4680           label3 = gen_label_rtx ();
4681           label4 = gen_label_rtx ();
4682           label5 = gen_label_rtx ();
4683           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4684           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4685           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4686                               quotient, 0, OPTAB_LIB_WIDEN);
4687           if (tem != quotient)
4688             emit_move_insn (quotient, tem);
4689           emit_jump_insn (targetm.gen_jump (label5));
4690           emit_barrier ();
4691           emit_label (label1);
4692           expand_inc (adjusted_op0, const1_rtx);
4693           emit_jump_insn (targetm.gen_jump (label4));
4694           emit_barrier ();
4695           emit_label (label2);
4696           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4697           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4698                               quotient, 0, OPTAB_LIB_WIDEN);
4699           if (tem != quotient)
4700             emit_move_insn (quotient, tem);
4701           emit_jump_insn (targetm.gen_jump (label5));
4702           emit_barrier ();
4703           emit_label (label3);
4704           expand_dec (adjusted_op0, const1_rtx);
4705           emit_label (label4);
4706           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4707                               quotient, 0, OPTAB_LIB_WIDEN);
4708           if (tem != quotient)
4709             emit_move_insn (quotient, tem);
4710           expand_dec (quotient, const1_rtx);
4711           emit_label (label5);
4712         }
4713         break;
4714
4715       case CEIL_DIV_EXPR:
4716       case CEIL_MOD_EXPR:
4717         if (unsignedp)
4718           {
4719             if (op1_is_constant
4720                 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4721                 && (size <= HOST_BITS_PER_WIDE_INT
4722                     || INTVAL (op1) >= 0))
4723               {
4724                 rtx t1, t2, t3;
4725                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4726                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4727                                    floor_log2 (d), tquotient, 1);
4728                 t2 = expand_binop (compute_mode, and_optab, op0,
4729                                    gen_int_mode (d - 1, compute_mode),
4730                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4731                 t3 = gen_reg_rtx (compute_mode);
4732                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4733                                       compute_mode, 1, 1);
4734                 if (t3 == 0)
4735                   {
4736                     rtx_code_label *lab;
4737                     lab = gen_label_rtx ();
4738                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4739                     expand_inc (t1, const1_rtx);
4740                     emit_label (lab);
4741                     quotient = t1;
4742                   }
4743                 else
4744                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4745                                                           t1, t3),
4746                                             tquotient);
4747                 break;
4748               }
4749
4750             /* Try using an instruction that produces both the quotient and
4751                remainder, using truncation.  We can easily compensate the
4752                quotient or remainder to get ceiling rounding, once we have the
4753                remainder.  Notice that we compute also the final remainder
4754                value here, and return the result right away.  */
4755             if (target == 0 || GET_MODE (target) != compute_mode)
4756               target = gen_reg_rtx (compute_mode);
4757
4758             if (rem_flag)
4759               {
4760                 remainder = (REG_P (target)
4761                              ? target : gen_reg_rtx (compute_mode));
4762                 quotient = gen_reg_rtx (compute_mode);
4763               }
4764             else
4765               {
4766                 quotient = (REG_P (target)
4767                             ? target : gen_reg_rtx (compute_mode));
4768                 remainder = gen_reg_rtx (compute_mode);
4769               }
4770
4771             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4772                                      remainder, 1))
4773               {
4774                 /* This could be computed with a branch-less sequence.
4775                    Save that for later.  */
4776                 rtx_code_label *label = gen_label_rtx ();
4777                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4778                                  compute_mode, label);
4779                 expand_inc (quotient, const1_rtx);
4780                 expand_dec (remainder, op1);
4781                 emit_label (label);
4782                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4783               }
4784
4785             /* No luck with division elimination or divmod.  Have to do it
4786                by conditionally adjusting op0 *and* the result.  */
4787             {
4788               rtx_code_label *label1, *label2;
4789               rtx adjusted_op0, tem;
4790
4791               quotient = gen_reg_rtx (compute_mode);
4792               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4793               label1 = gen_label_rtx ();
4794               label2 = gen_label_rtx ();
4795               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4796                                compute_mode, label1);
4797               emit_move_insn  (quotient, const0_rtx);
4798               emit_jump_insn (targetm.gen_jump (label2));
4799               emit_barrier ();
4800               emit_label (label1);
4801               expand_dec (adjusted_op0, const1_rtx);
4802               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4803                                   quotient, 1, OPTAB_LIB_WIDEN);
4804               if (tem != quotient)
4805                 emit_move_insn (quotient, tem);
4806               expand_inc (quotient, const1_rtx);
4807               emit_label (label2);
4808             }
4809           }
4810         else /* signed */
4811           {
4812             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4813                 && INTVAL (op1) >= 0)
4814               {
4815                 /* This is extremely similar to the code for the unsigned case
4816                    above.  For 2.7 we should merge these variants, but for
4817                    2.6.1 I don't want to touch the code for unsigned since that
4818                    get used in C.  The signed case will only be used by other
4819                    languages (Ada).  */
4820
4821                 rtx t1, t2, t3;
4822                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4823                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4824                                    floor_log2 (d), tquotient, 0);
4825                 t2 = expand_binop (compute_mode, and_optab, op0,
4826                                    gen_int_mode (d - 1, compute_mode),
4827                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4828                 t3 = gen_reg_rtx (compute_mode);
4829                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4830                                       compute_mode, 1, 1);
4831                 if (t3 == 0)
4832                   {
4833                     rtx_code_label *lab;
4834                     lab = gen_label_rtx ();
4835                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4836                     expand_inc (t1, const1_rtx);
4837                     emit_label (lab);
4838                     quotient = t1;
4839                   }
4840                 else
4841                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4842                                                           t1, t3),
4843                                             tquotient);
4844                 break;
4845               }
4846
4847             /* Try using an instruction that produces both the quotient and
4848                remainder, using truncation.  We can easily compensate the
4849                quotient or remainder to get ceiling rounding, once we have the
4850                remainder.  Notice that we compute also the final remainder
4851                value here, and return the result right away.  */
4852             if (target == 0 || GET_MODE (target) != compute_mode)
4853               target = gen_reg_rtx (compute_mode);
4854             if (rem_flag)
4855               {
4856                 remainder= (REG_P (target)
4857                             ? target : gen_reg_rtx (compute_mode));
4858                 quotient = gen_reg_rtx (compute_mode);
4859               }
4860             else
4861               {
4862                 quotient = (REG_P (target)
4863                             ? target : gen_reg_rtx (compute_mode));
4864                 remainder = gen_reg_rtx (compute_mode);
4865               }
4866
4867             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4868                                      remainder, 0))
4869               {
4870                 /* This could be computed with a branch-less sequence.
4871                    Save that for later.  */
4872                 rtx tem;
4873                 rtx_code_label *label = gen_label_rtx ();
4874                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4875                                  compute_mode, label);
4876                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4877                                     NULL_RTX, 0, OPTAB_WIDEN);
4878                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4879                 expand_inc (quotient, const1_rtx);
4880                 expand_dec (remainder, op1);
4881                 emit_label (label);
4882                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4883               }
4884
4885             /* No luck with division elimination or divmod.  Have to do it
4886                by conditionally adjusting op0 *and* the result.  */
4887             {
4888               rtx_code_label *label1, *label2, *label3, *label4, *label5;
4889               rtx adjusted_op0;
4890               rtx tem;
4891
4892               quotient = gen_reg_rtx (compute_mode);
4893               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4894               label1 = gen_label_rtx ();
4895               label2 = gen_label_rtx ();
4896               label3 = gen_label_rtx ();
4897               label4 = gen_label_rtx ();
4898               label5 = gen_label_rtx ();
4899               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4900               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4901                                compute_mode, label1);
4902               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4903                                   quotient, 0, OPTAB_LIB_WIDEN);
4904               if (tem != quotient)
4905                 emit_move_insn (quotient, tem);
4906               emit_jump_insn (targetm.gen_jump (label5));
4907               emit_barrier ();
4908               emit_label (label1);
4909               expand_dec (adjusted_op0, const1_rtx);
4910               emit_jump_insn (targetm.gen_jump (label4));
4911               emit_barrier ();
4912               emit_label (label2);
4913               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4914                                compute_mode, label3);
4915               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4916                                   quotient, 0, OPTAB_LIB_WIDEN);
4917               if (tem != quotient)
4918                 emit_move_insn (quotient, tem);
4919               emit_jump_insn (targetm.gen_jump (label5));
4920               emit_barrier ();
4921               emit_label (label3);
4922               expand_inc (adjusted_op0, const1_rtx);
4923               emit_label (label4);
4924               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4925                                   quotient, 0, OPTAB_LIB_WIDEN);
4926               if (tem != quotient)
4927                 emit_move_insn (quotient, tem);
4928               expand_inc (quotient, const1_rtx);
4929               emit_label (label5);
4930             }
4931           }
4932         break;
4933
4934       case EXACT_DIV_EXPR:
4935         if (op1_is_constant && size <= HOST_BITS_PER_WIDE_INT)
4936           {
4937             HOST_WIDE_INT d = INTVAL (op1);
4938             unsigned HOST_WIDE_INT ml;
4939             int pre_shift;
4940             rtx t1;
4941
4942             pre_shift = ctz_or_zero (d);
4943             ml = invert_mod2n (d >> pre_shift, size);
4944             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4945                                pre_shift, NULL_RTX, unsignedp);
4946             quotient = expand_mult (compute_mode, t1,
4947                                     gen_int_mode (ml, compute_mode),
4948                                     NULL_RTX, 1);
4949
4950             insn = get_last_insn ();
4951             set_dst_reg_note (insn, REG_EQUAL,
4952                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4953                                               compute_mode, op0, op1),
4954                               quotient);
4955           }
4956         break;
4957
4958       case ROUND_DIV_EXPR:
4959       case ROUND_MOD_EXPR:
4960         if (unsignedp)
4961           {
4962             rtx tem;
4963             rtx_code_label *label;
4964             label = gen_label_rtx ();
4965             quotient = gen_reg_rtx (compute_mode);
4966             remainder = gen_reg_rtx (compute_mode);
4967             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4968               {
4969                 rtx tem;
4970                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4971                                          quotient, 1, OPTAB_LIB_WIDEN);
4972                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4973                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4974                                           remainder, 1, OPTAB_LIB_WIDEN);
4975               }
4976             tem = plus_constant (compute_mode, op1, -1);
4977             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4978             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4979             expand_inc (quotient, const1_rtx);
4980             expand_dec (remainder, op1);
4981             emit_label (label);
4982           }
4983         else
4984           {
4985             rtx abs_rem, abs_op1, tem, mask;
4986             rtx_code_label *label;
4987             label = gen_label_rtx ();
4988             quotient = gen_reg_rtx (compute_mode);
4989             remainder = gen_reg_rtx (compute_mode);
4990             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4991               {
4992                 rtx tem;
4993                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4994                                          quotient, 0, OPTAB_LIB_WIDEN);
4995                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4996                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4997                                           remainder, 0, OPTAB_LIB_WIDEN);
4998               }
4999             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
5000             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
5001             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
5002                                 1, NULL_RTX, 1);
5003             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
5004             tem = expand_binop (compute_mode, xor_optab, op0, op1,
5005                                 NULL_RTX, 0, OPTAB_WIDEN);
5006             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
5007                                  size - 1, NULL_RTX, 0);
5008             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
5009                                 NULL_RTX, 0, OPTAB_WIDEN);
5010             tem = expand_binop (compute_mode, sub_optab, tem, mask,
5011                                 NULL_RTX, 0, OPTAB_WIDEN);
5012             expand_inc (quotient, tem);
5013             tem = expand_binop (compute_mode, xor_optab, mask, op1,
5014                                 NULL_RTX, 0, OPTAB_WIDEN);
5015             tem = expand_binop (compute_mode, sub_optab, tem, mask,
5016                                 NULL_RTX, 0, OPTAB_WIDEN);
5017             expand_dec (remainder, tem);
5018             emit_label (label);
5019           }
5020         return gen_lowpart (mode, rem_flag ? remainder : quotient);
5021
5022       default:
5023         gcc_unreachable ();
5024       }
5025
5026   if (quotient == 0)
5027     {
5028       if (target && GET_MODE (target) != compute_mode)
5029         target = 0;
5030
5031       if (rem_flag)
5032         {
5033           /* Try to produce the remainder without producing the quotient.
5034              If we seem to have a divmod pattern that does not require widening,
5035              don't try widening here.  We should really have a WIDEN argument
5036              to expand_twoval_binop, since what we'd really like to do here is
5037              1) try a mod insn in compute_mode
5038              2) try a divmod insn in compute_mode
5039              3) try a div insn in compute_mode and multiply-subtract to get
5040                 remainder
5041              4) try the same things with widening allowed.  */
5042           remainder
5043             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5044                                  op0, op1, target,
5045                                  unsignedp,
5046                                  ((optab_handler (optab2, compute_mode)
5047                                    != CODE_FOR_nothing)
5048                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
5049           if (remainder == 0)
5050             {
5051               /* No luck there.  Can we do remainder and divide at once
5052                  without a library call?  */
5053               remainder = gen_reg_rtx (compute_mode);
5054               if (! expand_twoval_binop ((unsignedp
5055                                           ? udivmod_optab
5056                                           : sdivmod_optab),
5057                                          op0, op1,
5058                                          NULL_RTX, remainder, unsignedp))
5059                 remainder = 0;
5060             }
5061
5062           if (remainder)
5063             return gen_lowpart (mode, remainder);
5064         }
5065
5066       /* Produce the quotient.  Try a quotient insn, but not a library call.
5067          If we have a divmod in this mode, use it in preference to widening
5068          the div (for this test we assume it will not fail). Note that optab2
5069          is set to the one of the two optabs that the call below will use.  */
5070       quotient
5071         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5072                              op0, op1, rem_flag ? NULL_RTX : target,
5073                              unsignedp,
5074                              ((optab_handler (optab2, compute_mode)
5075                                != CODE_FOR_nothing)
5076                               ? OPTAB_DIRECT : OPTAB_WIDEN));
5077
5078       if (quotient == 0)
5079         {
5080           /* No luck there.  Try a quotient-and-remainder insn,
5081              keeping the quotient alone.  */
5082           quotient = gen_reg_rtx (compute_mode);
5083           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5084                                      op0, op1,
5085                                      quotient, NULL_RTX, unsignedp))
5086             {
5087               quotient = 0;
5088               if (! rem_flag)
5089                 /* Still no luck.  If we are not computing the remainder,
5090                    use a library call for the quotient.  */
5091                 quotient = sign_expand_binop (compute_mode,
5092                                               udiv_optab, sdiv_optab,
5093                                               op0, op1, target,
5094                                               unsignedp, OPTAB_LIB_WIDEN);
5095             }
5096         }
5097     }
5098
5099   if (rem_flag)
5100     {
5101       if (target && GET_MODE (target) != compute_mode)
5102         target = 0;
5103
5104       if (quotient == 0)
5105         {
5106           /* No divide instruction either.  Use library for remainder.  */
5107           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5108                                          op0, op1, target,
5109                                          unsignedp, OPTAB_LIB_WIDEN);
5110           /* No remainder function.  Try a quotient-and-remainder
5111              function, keeping the remainder.  */
5112           if (!remainder)
5113             {
5114               remainder = gen_reg_rtx (compute_mode);
5115               if (!expand_twoval_binop_libfunc
5116                   (unsignedp ? udivmod_optab : sdivmod_optab,
5117                    op0, op1,
5118                    NULL_RTX, remainder,
5119                    unsignedp ? UMOD : MOD))
5120                 remainder = NULL_RTX;
5121             }
5122         }
5123       else
5124         {
5125           /* We divided.  Now finish doing X - Y * (X / Y).  */
5126           remainder = expand_mult (compute_mode, quotient, op1,
5127                                    NULL_RTX, unsignedp);
5128           remainder = expand_binop (compute_mode, sub_optab, op0,
5129                                     remainder, target, unsignedp,
5130                                     OPTAB_LIB_WIDEN);
5131         }
5132     }
5133
5134   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5135 }
5136 \f
5137 /* Return a tree node with data type TYPE, describing the value of X.
5138    Usually this is an VAR_DECL, if there is no obvious better choice.
5139    X may be an expression, however we only support those expressions
5140    generated by loop.c.  */
5141
5142 tree
5143 make_tree (tree type, rtx x)
5144 {
5145   tree t;
5146
5147   switch (GET_CODE (x))
5148     {
5149     case CONST_INT:
5150     case CONST_WIDE_INT:
5151       t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5152       return t;
5153
5154     case CONST_DOUBLE:
5155       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5156       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5157         t = wide_int_to_tree (type,
5158                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5159                                                     HOST_BITS_PER_WIDE_INT * 2));
5160       else
5161         t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5162
5163       return t;
5164
5165     case CONST_VECTOR:
5166       {
5167         int units = CONST_VECTOR_NUNITS (x);
5168         tree itype = TREE_TYPE (type);
5169         tree *elts;
5170         int i;
5171
5172         /* Build a tree with vector elements.  */
5173         elts = XALLOCAVEC (tree, units);
5174         for (i = units - 1; i >= 0; --i)
5175           {
5176             rtx elt = CONST_VECTOR_ELT (x, i);
5177             elts[i] = make_tree (itype, elt);
5178           }
5179
5180         return build_vector (type, elts);
5181       }
5182
5183     case PLUS:
5184       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5185                           make_tree (type, XEXP (x, 1)));
5186
5187     case MINUS:
5188       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5189                           make_tree (type, XEXP (x, 1)));
5190
5191     case NEG:
5192       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5193
5194     case MULT:
5195       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5196                           make_tree (type, XEXP (x, 1)));
5197
5198     case ASHIFT:
5199       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5200                           make_tree (type, XEXP (x, 1)));
5201
5202     case LSHIFTRT:
5203       t = unsigned_type_for (type);
5204       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5205                                          make_tree (t, XEXP (x, 0)),
5206                                          make_tree (type, XEXP (x, 1))));
5207
5208     case ASHIFTRT:
5209       t = signed_type_for (type);
5210       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5211                                          make_tree (t, XEXP (x, 0)),
5212                                          make_tree (type, XEXP (x, 1))));
5213
5214     case DIV:
5215       if (TREE_CODE (type) != REAL_TYPE)
5216         t = signed_type_for (type);
5217       else
5218         t = type;
5219
5220       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5221                                          make_tree (t, XEXP (x, 0)),
5222                                          make_tree (t, XEXP (x, 1))));
5223     case UDIV:
5224       t = unsigned_type_for (type);
5225       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5226                                          make_tree (t, XEXP (x, 0)),
5227                                          make_tree (t, XEXP (x, 1))));
5228
5229     case SIGN_EXTEND:
5230     case ZERO_EXTEND:
5231       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5232                                           GET_CODE (x) == ZERO_EXTEND);
5233       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5234
5235     case CONST:
5236       return make_tree (type, XEXP (x, 0));
5237
5238     case SYMBOL_REF:
5239       t = SYMBOL_REF_DECL (x);
5240       if (t)
5241         return fold_convert (type, build_fold_addr_expr (t));
5242       /* fall through.  */
5243
5244     default:
5245       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5246
5247       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5248          address mode to pointer mode.  */
5249       if (POINTER_TYPE_P (type))
5250         x = convert_memory_address_addr_space
5251               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5252
5253       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5254          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5255       t->decl_with_rtl.rtl = x;
5256
5257       return t;
5258     }
5259 }
5260 \f
5261 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5262    and returning TARGET.
5263
5264    If TARGET is 0, a pseudo-register or constant is returned.  */
5265
5266 rtx
5267 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5268 {
5269   rtx tem = 0;
5270
5271   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5272     tem = simplify_binary_operation (AND, mode, op0, op1);
5273   if (tem == 0)
5274     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5275
5276   if (target == 0)
5277     target = tem;
5278   else if (tem != target)
5279     emit_move_insn (target, tem);
5280   return target;
5281 }
5282
5283 /* Helper function for emit_store_flag.  */
5284 rtx
5285 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5286              machine_mode mode, machine_mode compare_mode,
5287              int unsignedp, rtx x, rtx y, int normalizep,
5288              machine_mode target_mode)
5289 {
5290   struct expand_operand ops[4];
5291   rtx op0, comparison, subtarget;
5292   rtx_insn *last;
5293   machine_mode result_mode = targetm.cstore_mode (icode);
5294
5295   last = get_last_insn ();
5296   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5297   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5298   if (!x || !y)
5299     {
5300       delete_insns_since (last);
5301       return NULL_RTX;
5302     }
5303
5304   if (target_mode == VOIDmode)
5305     target_mode = result_mode;
5306   if (!target)
5307     target = gen_reg_rtx (target_mode);
5308
5309   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5310
5311   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5312   create_fixed_operand (&ops[1], comparison);
5313   create_fixed_operand (&ops[2], x);
5314   create_fixed_operand (&ops[3], y);
5315   if (!maybe_expand_insn (icode, 4, ops))
5316     {
5317       delete_insns_since (last);
5318       return NULL_RTX;
5319     }
5320   subtarget = ops[0].value;
5321
5322   /* If we are converting to a wider mode, first convert to
5323      TARGET_MODE, then normalize.  This produces better combining
5324      opportunities on machines that have a SIGN_EXTRACT when we are
5325      testing a single bit.  This mostly benefits the 68k.
5326
5327      If STORE_FLAG_VALUE does not have the sign bit set when
5328      interpreted in MODE, we can do this conversion as unsigned, which
5329      is usually more efficient.  */
5330   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5331     {
5332       convert_move (target, subtarget,
5333                     val_signbit_known_clear_p (result_mode,
5334                                                STORE_FLAG_VALUE));
5335       op0 = target;
5336       result_mode = target_mode;
5337     }
5338   else
5339     op0 = subtarget;
5340
5341   /* If we want to keep subexpressions around, don't reuse our last
5342      target.  */
5343   if (optimize)
5344     subtarget = 0;
5345
5346   /* Now normalize to the proper value in MODE.  Sometimes we don't
5347      have to do anything.  */
5348   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5349     ;
5350   /* STORE_FLAG_VALUE might be the most negative number, so write
5351      the comparison this way to avoid a compiler-time warning.  */
5352   else if (- normalizep == STORE_FLAG_VALUE)
5353     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5354
5355   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5356      it hard to use a value of just the sign bit due to ANSI integer
5357      constant typing rules.  */
5358   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5359     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5360                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5361                         normalizep == 1);
5362   else
5363     {
5364       gcc_assert (STORE_FLAG_VALUE & 1);
5365
5366       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5367       if (normalizep == -1)
5368         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5369     }
5370
5371   /* If we were converting to a smaller mode, do the conversion now.  */
5372   if (target_mode != result_mode)
5373     {
5374       convert_move (target, op0, 0);
5375       return target;
5376     }
5377   else
5378     return op0;
5379 }
5380
5381
5382 /* A subroutine of emit_store_flag only including "tricks" that do not
5383    need a recursive call.  These are kept separate to avoid infinite
5384    loops.  */
5385
5386 static rtx
5387 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5388                    machine_mode mode, int unsignedp, int normalizep,
5389                    machine_mode target_mode)
5390 {
5391   rtx subtarget;
5392   enum insn_code icode;
5393   machine_mode compare_mode;
5394   enum mode_class mclass;
5395   enum rtx_code scode;
5396
5397   if (unsignedp)
5398     code = unsigned_condition (code);
5399   scode = swap_condition (code);
5400
5401   /* If one operand is constant, make it the second one.  Only do this
5402      if the other operand is not constant as well.  */
5403
5404   if (swap_commutative_operands_p (op0, op1))
5405     {
5406       std::swap (op0, op1);
5407       code = swap_condition (code);
5408     }
5409
5410   if (mode == VOIDmode)
5411     mode = GET_MODE (op0);
5412
5413   /* For some comparisons with 1 and -1, we can convert this to
5414      comparisons with zero.  This will often produce more opportunities for
5415      store-flag insns.  */
5416
5417   switch (code)
5418     {
5419     case LT:
5420       if (op1 == const1_rtx)
5421         op1 = const0_rtx, code = LE;
5422       break;
5423     case LE:
5424       if (op1 == constm1_rtx)
5425         op1 = const0_rtx, code = LT;
5426       break;
5427     case GE:
5428       if (op1 == const1_rtx)
5429         op1 = const0_rtx, code = GT;
5430       break;
5431     case GT:
5432       if (op1 == constm1_rtx)
5433         op1 = const0_rtx, code = GE;
5434       break;
5435     case GEU:
5436       if (op1 == const1_rtx)
5437         op1 = const0_rtx, code = NE;
5438       break;
5439     case LTU:
5440       if (op1 == const1_rtx)
5441         op1 = const0_rtx, code = EQ;
5442       break;
5443     default:
5444       break;
5445     }
5446
5447   /* If we are comparing a double-word integer with zero or -1, we can
5448      convert the comparison into one involving a single word.  */
5449   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5450       && GET_MODE_CLASS (mode) == MODE_INT
5451       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5452     {
5453       rtx tem;
5454       if ((code == EQ || code == NE)
5455           && (op1 == const0_rtx || op1 == constm1_rtx))
5456         {
5457           rtx op00, op01;
5458
5459           /* Do a logical OR or AND of the two words and compare the
5460              result.  */
5461           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5462           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5463           tem = expand_binop (word_mode,
5464                               op1 == const0_rtx ? ior_optab : and_optab,
5465                               op00, op01, NULL_RTX, unsignedp,
5466                               OPTAB_DIRECT);
5467
5468           if (tem != 0)
5469             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5470                                    unsignedp, normalizep);
5471         }
5472       else if ((code == LT || code == GE) && op1 == const0_rtx)
5473         {
5474           rtx op0h;
5475
5476           /* If testing the sign bit, can just test on high word.  */
5477           op0h = simplify_gen_subreg (word_mode, op0, mode,
5478                                       subreg_highpart_offset (word_mode,
5479                                                               mode));
5480           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5481                                  unsignedp, normalizep);
5482         }
5483       else
5484         tem = NULL_RTX;
5485
5486       if (tem)
5487         {
5488           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5489             return tem;
5490           if (!target)
5491             target = gen_reg_rtx (target_mode);
5492
5493           convert_move (target, tem,
5494                         !val_signbit_known_set_p (word_mode,
5495                                                   (normalizep ? normalizep
5496                                                    : STORE_FLAG_VALUE)));
5497           return target;
5498         }
5499     }
5500
5501   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5502      complement of A (for GE) and shifting the sign bit to the low bit.  */
5503   if (op1 == const0_rtx && (code == LT || code == GE)
5504       && GET_MODE_CLASS (mode) == MODE_INT
5505       && (normalizep || STORE_FLAG_VALUE == 1
5506           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5507     {
5508       subtarget = target;
5509
5510       if (!target)
5511         target_mode = mode;
5512
5513       /* If the result is to be wider than OP0, it is best to convert it
5514          first.  If it is to be narrower, it is *incorrect* to convert it
5515          first.  */
5516       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5517         {
5518           op0 = convert_modes (target_mode, mode, op0, 0);
5519           mode = target_mode;
5520         }
5521
5522       if (target_mode != mode)
5523         subtarget = 0;
5524
5525       if (code == GE)
5526         op0 = expand_unop (mode, one_cmpl_optab, op0,
5527                            ((STORE_FLAG_VALUE == 1 || normalizep)
5528                             ? 0 : subtarget), 0);
5529
5530       if (STORE_FLAG_VALUE == 1 || normalizep)
5531         /* If we are supposed to produce a 0/1 value, we want to do
5532            a logical shift from the sign bit to the low-order bit; for
5533            a -1/0 value, we do an arithmetic shift.  */
5534         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5535                             GET_MODE_BITSIZE (mode) - 1,
5536                             subtarget, normalizep != -1);
5537
5538       if (mode != target_mode)
5539         op0 = convert_modes (target_mode, mode, op0, 0);
5540
5541       return op0;
5542     }
5543
5544   mclass = GET_MODE_CLASS (mode);
5545   FOR_EACH_MODE_FROM (compare_mode, mode)
5546     {
5547      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5548      icode = optab_handler (cstore_optab, optab_mode);
5549      if (icode != CODE_FOR_nothing)
5550         {
5551           do_pending_stack_adjust ();
5552           rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5553                                  unsignedp, op0, op1, normalizep, target_mode);
5554           if (tem)
5555             return tem;
5556
5557           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5558             {
5559               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5560                                  unsignedp, op1, op0, normalizep, target_mode);
5561               if (tem)
5562                 return tem;
5563             }
5564           break;
5565         }
5566     }
5567
5568   return 0;
5569 }
5570
5571 /* Subroutine of emit_store_flag that handles cases in which the operands
5572    are scalar integers.  SUBTARGET is the target to use for temporary
5573    operations and TRUEVAL is the value to store when the condition is
5574    true.  All other arguments are as for emit_store_flag.  */
5575
5576 rtx
5577 emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0,
5578                      rtx op1, machine_mode mode, int unsignedp,
5579                      int normalizep, rtx trueval)
5580 {
5581   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5582   rtx_insn *last = get_last_insn ();
5583   rtx tem;
5584
5585   /* If this is an equality comparison of integers, we can try to exclusive-or
5586      (or subtract) the two operands and use a recursive call to try the
5587      comparison with zero.  Don't do any of these cases if branches are
5588      very cheap.  */
5589
5590   if ((code == EQ || code == NE) && op1 != const0_rtx)
5591     {
5592       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5593                           OPTAB_WIDEN);
5594
5595       if (tem == 0)
5596         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5597                             OPTAB_WIDEN);
5598       if (tem != 0)
5599         tem = emit_store_flag (target, code, tem, const0_rtx,
5600                                mode, unsignedp, normalizep);
5601       if (tem != 0)
5602         return tem;
5603
5604       delete_insns_since (last);
5605     }
5606
5607   /* For integer comparisons, try the reverse comparison.  However, for
5608      small X and if we'd have anyway to extend, implementing "X != 0"
5609      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5610   rtx_code rcode = reverse_condition (code);
5611   if (can_compare_p (rcode, mode, ccp_store_flag)
5612       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5613             && code == NE
5614             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5615             && op1 == const0_rtx))
5616     {
5617       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5618                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5619
5620       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5621       if (want_add
5622           && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5623                        optimize_insn_for_speed_p ()) == 0)
5624         {
5625           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5626                                    STORE_FLAG_VALUE, target_mode);
5627           if (tem != 0)
5628             tem = expand_binop (target_mode, add_optab, tem,
5629                                 gen_int_mode (normalizep, target_mode),
5630                                 target, 0, OPTAB_WIDEN);
5631         }
5632       else if (!want_add
5633                && rtx_cost (trueval, mode, XOR, 1,
5634                             optimize_insn_for_speed_p ()) == 0)
5635         {
5636           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5637                                    normalizep, target_mode);
5638           if (tem != 0)
5639             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5640                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5641         }
5642
5643       if (tem != 0)
5644         return tem;
5645       delete_insns_since (last);
5646     }
5647
5648   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5649      the constant zero.  Reject all other comparisons at this point.  Only
5650      do LE and GT if branches are expensive since they are expensive on
5651      2-operand machines.  */
5652
5653   if (op1 != const0_rtx
5654       || (code != EQ && code != NE
5655           && (BRANCH_COST (optimize_insn_for_speed_p (),
5656                            false) <= 1 || (code != LE && code != GT))))
5657     return 0;
5658
5659   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5660      do the necessary operation below.  */
5661
5662   tem = 0;
5663
5664   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5665      the sign bit set.  */
5666
5667   if (code == LE)
5668     {
5669       /* This is destructive, so SUBTARGET can't be OP0.  */
5670       if (rtx_equal_p (subtarget, op0))
5671         subtarget = 0;
5672
5673       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5674                           OPTAB_WIDEN);
5675       if (tem)
5676         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5677                             OPTAB_WIDEN);
5678     }
5679
5680   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5681      number of bits in the mode of OP0, minus one.  */
5682
5683   if (code == GT)
5684     {
5685       if (rtx_equal_p (subtarget, op0))
5686         subtarget = 0;
5687
5688       tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5689                                 GET_MODE_BITSIZE (mode) - 1,
5690                                 subtarget, 0);
5691       if (tem)
5692         tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5693                             OPTAB_WIDEN);
5694     }
5695
5696   if (code == EQ || code == NE)
5697     {
5698       /* For EQ or NE, one way to do the comparison is to apply an operation
5699          that converts the operand into a positive number if it is nonzero
5700          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5701          for NE we negate.  This puts the result in the sign bit.  Then we
5702          normalize with a shift, if needed.
5703
5704          Two operations that can do the above actions are ABS and FFS, so try
5705          them.  If that doesn't work, and MODE is smaller than a full word,
5706          we can use zero-extension to the wider mode (an unsigned conversion)
5707          as the operation.  */
5708
5709       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5710          that is compensated by the subsequent overflow when subtracting
5711          one / negating.  */
5712
5713       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5714         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5715       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5716         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5717       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5718         {
5719           tem = convert_modes (word_mode, mode, op0, 1);
5720           mode = word_mode;
5721         }
5722
5723       if (tem != 0)
5724         {
5725           if (code == EQ)
5726             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5727                                 0, OPTAB_WIDEN);
5728           else
5729             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5730         }
5731
5732       /* If we couldn't do it that way, for NE we can "or" the two's complement
5733          of the value with itself.  For EQ, we take the one's complement of
5734          that "or", which is an extra insn, so we only handle EQ if branches
5735          are expensive.  */
5736
5737       if (tem == 0
5738           && (code == NE
5739               || BRANCH_COST (optimize_insn_for_speed_p (),
5740                               false) > 1))
5741         {
5742           if (rtx_equal_p (subtarget, op0))
5743             subtarget = 0;
5744
5745           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5746           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5747                               OPTAB_WIDEN);
5748
5749           if (tem && code == EQ)
5750             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5751         }
5752     }
5753
5754   if (tem && normalizep)
5755     tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5756                               GET_MODE_BITSIZE (mode) - 1,
5757                               subtarget, normalizep == 1);
5758
5759   if (tem)
5760     {
5761       if (!target)
5762         ;
5763       else if (GET_MODE (tem) != target_mode)
5764         {
5765           convert_move (target, tem, 0);
5766           tem = target;
5767         }
5768       else if (!subtarget)
5769         {
5770           emit_move_insn (target, tem);
5771           tem = target;
5772         }
5773     }
5774   else
5775     delete_insns_since (last);
5776
5777   return tem;
5778 }
5779
5780 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5781    and storing in TARGET.  Normally return TARGET.
5782    Return 0 if that cannot be done.
5783
5784    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5785    it is VOIDmode, they cannot both be CONST_INT.
5786
5787    UNSIGNEDP is for the case where we have to widen the operands
5788    to perform the operation.  It says to use zero-extension.
5789
5790    NORMALIZEP is 1 if we should convert the result to be either zero
5791    or one.  Normalize is -1 if we should convert the result to be
5792    either zero or -1.  If NORMALIZEP is zero, the result will be left
5793    "raw" out of the scc insn.  */
5794
5795 rtx
5796 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5797                  machine_mode mode, int unsignedp, int normalizep)
5798 {
5799   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5800   enum rtx_code rcode;
5801   rtx subtarget;
5802   rtx tem, trueval;
5803   rtx_insn *last;
5804
5805   /* If we compare constants, we shouldn't use a store-flag operation,
5806      but a constant load.  We can get there via the vanilla route that
5807      usually generates a compare-branch sequence, but will in this case
5808      fold the comparison to a constant, and thus elide the branch.  */
5809   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5810     return NULL_RTX;
5811
5812   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5813                            target_mode);
5814   if (tem)
5815     return tem;
5816
5817   /* If we reached here, we can't do this with a scc insn, however there
5818      are some comparisons that can be done in other ways.  Don't do any
5819      of these cases if branches are very cheap.  */
5820   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5821     return 0;
5822
5823   /* See what we need to return.  We can only return a 1, -1, or the
5824      sign bit.  */
5825
5826   if (normalizep == 0)
5827     {
5828       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5829         normalizep = STORE_FLAG_VALUE;
5830
5831       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5832         ;
5833       else
5834         return 0;
5835     }
5836
5837   last = get_last_insn ();
5838
5839   /* If optimizing, use different pseudo registers for each insn, instead
5840      of reusing the same pseudo.  This leads to better CSE, but slows
5841      down the compiler, since there are more pseudos.  */
5842   subtarget = (!optimize
5843                && (target_mode == mode)) ? target : NULL_RTX;
5844   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5845
5846   /* For floating-point comparisons, try the reverse comparison or try
5847      changing the "orderedness" of the comparison.  */
5848   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5849     {
5850       enum rtx_code first_code;
5851       bool and_them;
5852
5853       rcode = reverse_condition_maybe_unordered (code);
5854       if (can_compare_p (rcode, mode, ccp_store_flag)
5855           && (code == ORDERED || code == UNORDERED
5856               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5857               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5858         {
5859           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5860                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5861
5862           /* For the reverse comparison, use either an addition or a XOR.  */
5863           if (want_add
5864               && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5865                            optimize_insn_for_speed_p ()) == 0)
5866             {
5867               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5868                                        STORE_FLAG_VALUE, target_mode);
5869               if (tem)
5870                 return expand_binop (target_mode, add_optab, tem,
5871                                      gen_int_mode (normalizep, target_mode),
5872                                      target, 0, OPTAB_WIDEN);
5873             }
5874           else if (!want_add
5875                    && rtx_cost (trueval, mode, XOR, 1,
5876                                 optimize_insn_for_speed_p ()) == 0)
5877             {
5878               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5879                                        normalizep, target_mode);
5880               if (tem)
5881                 return expand_binop (target_mode, xor_optab, tem, trueval,
5882                                      target, INTVAL (trueval) >= 0,
5883                                      OPTAB_WIDEN);
5884             }
5885         }
5886
5887       delete_insns_since (last);
5888
5889       /* Cannot split ORDERED and UNORDERED, only try the above trick.  */
5890       if (code == ORDERED || code == UNORDERED)
5891         return 0;
5892
5893       and_them = split_comparison (code, mode, &first_code, &code);
5894
5895       /* If there are no NaNs, the first comparison should always fall through.
5896          Effectively change the comparison to the other one.  */
5897       if (!HONOR_NANS (mode))
5898         {
5899           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5900           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5901                                     target_mode);
5902         }
5903
5904       if (!HAVE_conditional_move)
5905         return 0;
5906
5907       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5908          conditional move.  */
5909       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5910                                normalizep, target_mode);
5911       if (tem == 0)
5912         return 0;
5913
5914       if (and_them)
5915         tem = emit_conditional_move (target, code, op0, op1, mode,
5916                                      tem, const0_rtx, GET_MODE (tem), 0);
5917       else
5918         tem = emit_conditional_move (target, code, op0, op1, mode,
5919                                      trueval, tem, GET_MODE (tem), 0);
5920
5921       if (tem == 0)
5922         delete_insns_since (last);
5923       return tem;
5924     }
5925
5926   /* The remaining tricks only apply to integer comparisons.  */
5927
5928   if (GET_MODE_CLASS (mode) == MODE_INT)
5929     return emit_store_flag_int (target, subtarget, code, op0, op1, mode,
5930                                 unsignedp, normalizep, trueval);
5931
5932   return 0;
5933 }
5934
5935 /* Like emit_store_flag, but always succeeds.  */
5936
5937 rtx
5938 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5939                        machine_mode mode, int unsignedp, int normalizep)
5940 {
5941   rtx tem;
5942   rtx_code_label *label;
5943   rtx trueval, falseval;
5944
5945   /* First see if emit_store_flag can do the job.  */
5946   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5947   if (tem != 0)
5948     return tem;
5949
5950   if (!target)
5951     target = gen_reg_rtx (word_mode);
5952
5953   /* If this failed, we have to do this with set/compare/jump/set code.
5954      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5955   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5956   if (code == NE
5957       && GET_MODE_CLASS (mode) == MODE_INT
5958       && REG_P (target)
5959       && op0 == target
5960       && op1 == const0_rtx)
5961     {
5962       label = gen_label_rtx ();
5963       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
5964                                NULL_RTX, NULL, label,
5965                                profile_probability::uninitialized ());
5966       emit_move_insn (target, trueval);
5967       emit_label (label);
5968       return target;
5969     }
5970
5971   if (!REG_P (target)
5972       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5973     target = gen_reg_rtx (GET_MODE (target));
5974
5975   /* Jump in the right direction if the target cannot implement CODE
5976      but can jump on its reverse condition.  */
5977   falseval = const0_rtx;
5978   if (! can_compare_p (code, mode, ccp_jump)
5979       && (! FLOAT_MODE_P (mode)
5980           || code == ORDERED || code == UNORDERED
5981           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5982           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5983     {
5984       enum rtx_code rcode;
5985       if (FLOAT_MODE_P (mode))
5986         rcode = reverse_condition_maybe_unordered (code);
5987       else
5988         rcode = reverse_condition (code);
5989
5990       /* Canonicalize to UNORDERED for the libcall.  */
5991       if (can_compare_p (rcode, mode, ccp_jump)
5992           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5993         {
5994           falseval = trueval;
5995           trueval = const0_rtx;
5996           code = rcode;
5997         }
5998     }
5999
6000   emit_move_insn (target, trueval);
6001   label = gen_label_rtx ();
6002   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
6003                            label, profile_probability::uninitialized ());
6004
6005   emit_move_insn (target, falseval);
6006   emit_label (label);
6007
6008   return target;
6009 }
6010 \f
6011 /* Perform possibly multi-word comparison and conditional jump to LABEL
6012    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
6013    now a thin wrapper around do_compare_rtx_and_jump.  */
6014
6015 static void
6016 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6017                  rtx_code_label *label)
6018 {
6019   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6020   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6021                            NULL, label, profile_probability::uninitialized ());
6022 }