gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2017 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "target.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "predict.h"
  30 #include "memmodel.h"
  31 #include "tm_p.h"
  32 #include "expmed.h"
  33 #include "optabs.h"
  34 #include "emit-rtl.h"
  35 #include "diagnostic-core.h"
  36 #include "fold-const.h"
  37 #include "stor-layout.h"
  38 #include "dojump.h"
  39 #include "explow.h"
  40 #include "expr.h"
  41 #include "langhooks.h"
  42
  43 struct target_expmed default_target_expmed;
  44 #if SWITCHABLE_TARGET
  45 struct target_expmed *this_target_expmed = &default_target_expmed;
  46 #endif
  47
  48 static void store_fixed_bit_field (rtx, opt_scalar_int_mode,
  49                                    unsigned HOST_WIDE_INT,
  50                                    unsigned HOST_WIDE_INT,
  51                                    unsigned HOST_WIDE_INT,
  52                                    unsigned HOST_WIDE_INT,
  53                                    rtx, scalar_int_mode, bool);
  54 static void store_fixed_bit_field_1 (rtx, scalar_int_mode,
  55                                      unsigned HOST_WIDE_INT,
  56                                      unsigned HOST_WIDE_INT,
  57                                      rtx, scalar_int_mode, bool);
  58 static void store_split_bit_field (rtx, opt_scalar_int_mode,
  59                                    unsigned HOST_WIDE_INT,
  60                                    unsigned HOST_WIDE_INT,
  61                                    unsigned HOST_WIDE_INT,
  62                                    unsigned HOST_WIDE_INT,
  63                                    rtx, scalar_int_mode, bool);
  64 static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode,
  65                                     unsigned HOST_WIDE_INT,
  66                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  67 static rtx extract_fixed_bit_field_1 (machine_mode, rtx, scalar_int_mode,
  68                                       unsigned HOST_WIDE_INT,
  69                                       unsigned HOST_WIDE_INT, rtx, int, bool);
  70 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  71 static rtx extract_split_bit_field (rtx, opt_scalar_int_mode,
  72                                     unsigned HOST_WIDE_INT,
  73                                     unsigned HOST_WIDE_INT, int, bool);
  74 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  75 static rtx expand_smod_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
  76 static rtx expand_sdiv_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
  77
  78 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  79    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  80    The mask is truncated if necessary to the width of mode MODE.  The
  81    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
  82
  83 static inline rtx
  84 mask_rtx (scalar_int_mode mode, int bitpos, int bitsize, bool complement)
  85 {
  86   return immed_wide_int_const
  87     (wi::shifted_mask (bitpos, bitsize, complement,
  88                        GET_MODE_PRECISION (mode)), mode);
  89 }
  90
  91 /* Test whether a value is zero of a power of two.  */
  92 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
  93   (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
  94
  95 struct init_expmed_rtl
  96 {
  97   rtx reg;
  98   rtx plus;
  99   rtx neg;
 100   rtx mult;
 101   rtx sdiv;
 102   rtx udiv;
 103   rtx sdiv_32;
 104   rtx smod_32;
 105   rtx wide_mult;
 106   rtx wide_lshr;
 107   rtx wide_trunc;
 108   rtx shift;
 109   rtx shift_mult;
 110   rtx shift_add;
 111   rtx shift_sub0;
 112   rtx shift_sub1;
 113   rtx zext;
 114   rtx trunc;
 115
 116   rtx pow2[MAX_BITS_PER_WORD];
 117   rtx cint[MAX_BITS_PER_WORD];
 118 };
 119
 120 static void
 121 init_expmed_one_conv (struct init_expmed_rtl *all, scalar_int_mode to_mode,
 122                       scalar_int_mode from_mode, bool speed)
 123 {
 124   int to_size, from_size;
 125   rtx which;
 126
 127   to_size = GET_MODE_PRECISION (to_mode);
 128   from_size = GET_MODE_PRECISION (from_mode);
 129
 130   /* Most partial integers have a precision less than the "full"
 131      integer it requires for storage.  In case one doesn't, for
 132      comparison purposes here, reduce the bit size by one in that
 133      case.  */
 134   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 135       && pow2p_hwi (to_size))
 136     to_size --;
 137   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 138       && pow2p_hwi (from_size))
 139     from_size --;
 140
 141   /* Assume cost of zero-extend and sign-extend is the same.  */
 142   which = (to_size < from_size ? all->trunc : all->zext);
 143
 144   PUT_MODE (all->reg, from_mode);
 145   set_convert_cost (to_mode, from_mode, speed,
 146                     set_src_cost (which, to_mode, speed));
 147 }
 148
 149 static void
 150 init_expmed_one_mode (struct init_expmed_rtl *all,
 151                       machine_mode mode, int speed)
 152 {
 153   int m, n, mode_bitsize;
 154   machine_mode mode_from;
 155
 156   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 157
 158   PUT_MODE (all->reg, mode);
 159   PUT_MODE (all->plus, mode);
 160   PUT_MODE (all->neg, mode);
 161   PUT_MODE (all->mult, mode);
 162   PUT_MODE (all->sdiv, mode);
 163   PUT_MODE (all->udiv, mode);
 164   PUT_MODE (all->sdiv_32, mode);
 165   PUT_MODE (all->smod_32, mode);
 166   PUT_MODE (all->wide_trunc, mode);
 167   PUT_MODE (all->shift, mode);
 168   PUT_MODE (all->shift_mult, mode);
 169   PUT_MODE (all->shift_add, mode);
 170   PUT_MODE (all->shift_sub0, mode);
 171   PUT_MODE (all->shift_sub1, mode);
 172   PUT_MODE (all->zext, mode);
 173   PUT_MODE (all->trunc, mode);
 174
 175   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
 176   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
 177   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
 178   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
 179   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
 180
 181   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
 182                                      <= 2 * add_cost (speed, mode)));
 183   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
 184                                      <= 4 * add_cost (speed, mode)));
 185
 186   set_shift_cost (speed, mode, 0, 0);
 187   {
 188     int cost = add_cost (speed, mode);
 189     set_shiftadd_cost (speed, mode, 0, cost);
 190     set_shiftsub0_cost (speed, mode, 0, cost);
 191     set_shiftsub1_cost (speed, mode, 0, cost);
 192   }
 193
 194   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 195   for (m = 1; m < n; m++)
 196     {
 197       XEXP (all->shift, 1) = all->cint[m];
 198       XEXP (all->shift_mult, 1) = all->pow2[m];
 199
 200       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
 201       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
 202                                                        speed));
 203       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
 204                                                         speed));
 205       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
 206                                                         speed));
 207     }
 208
 209   scalar_int_mode int_mode_to;
 210   if (is_a <scalar_int_mode> (mode, &int_mode_to))
 211     {
 212       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 213            mode_from = (machine_mode)(mode_from + 1))
 214         init_expmed_one_conv (all, int_mode_to,
 215                               as_a <scalar_int_mode> (mode_from), speed);
 216
 217       scalar_int_mode wider_mode;
 218       if (GET_MODE_CLASS (int_mode_to) == MODE_INT
 219           && GET_MODE_WIDER_MODE (int_mode_to).exists (&wider_mode))
 220         {
 221           PUT_MODE (all->zext, wider_mode);
 222           PUT_MODE (all->wide_mult, wider_mode);
 223           PUT_MODE (all->wide_lshr, wider_mode);
 224           XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 225
 226           set_mul_widen_cost (speed, wider_mode,
 227                               set_src_cost (all->wide_mult, wider_mode, speed));
 228           set_mul_highpart_cost (speed, int_mode_to,
 229                                  set_src_cost (all->wide_trunc,
 230                                                int_mode_to, speed));
 231         }
 232     }
 233 }
 234
 235 void
 236 init_expmed (void)
 237 {
 238   struct init_expmed_rtl all;
 239   machine_mode mode = QImode;
 240   int m, speed;
 241
 242   memset (&all, 0, sizeof all);
 243   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 244     {
 245       all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
 246       all.cint[m] = GEN_INT (m);
 247     }
 248
 249   /* Avoid using hard regs in ways which may be unsupported.  */
 250   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 251   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 252   all.neg = gen_rtx_NEG (mode, all.reg);
 253   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 254   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 255   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 256   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 257   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 258   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 259   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 260   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 261   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 262   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 263   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 264   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 265   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 266   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 267   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 268
 269   for (speed = 0; speed < 2; speed++)
 270     {
 271       crtl->maybe_hot_insn_p = speed;
 272       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
 273
 274       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 275            mode = (machine_mode)(mode + 1))
 276         init_expmed_one_mode (&all, mode, speed);
 277
 278       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 279         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 280              mode = (machine_mode)(mode + 1))
 281           init_expmed_one_mode (&all, mode, speed);
 282
 283       if (MIN_MODE_VECTOR_INT != VOIDmode)
 284         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 285              mode = (machine_mode)(mode + 1))
 286           init_expmed_one_mode (&all, mode, speed);
 287     }
 288
 289   if (alg_hash_used_p ())
 290     {
 291       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 292       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 293     }
 294   else
 295     set_alg_hash_used_p (true);
 296   default_rtl_profile ();
 297
 298   ggc_free (all.trunc);
 299   ggc_free (all.shift_sub1);
 300   ggc_free (all.shift_sub0);
 301   ggc_free (all.shift_add);
 302   ggc_free (all.shift_mult);
 303   ggc_free (all.shift);
 304   ggc_free (all.wide_trunc);
 305   ggc_free (all.wide_lshr);
 306   ggc_free (all.wide_mult);
 307   ggc_free (all.zext);
 308   ggc_free (all.smod_32);
 309   ggc_free (all.sdiv_32);
 310   ggc_free (all.udiv);
 311   ggc_free (all.sdiv);
 312   ggc_free (all.mult);
 313   ggc_free (all.neg);
 314   ggc_free (all.plus);
 315   ggc_free (all.reg);
 316 }
 317
 318 /* Return an rtx representing minus the value of X.
 319    MODE is the intended mode of the result,
 320    useful if X is a CONST_INT.  */
 321
 322 rtx
 323 negate_rtx (machine_mode mode, rtx x)
 324 {
 325   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 326
 327   if (result == 0)
 328     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 329
 330   return result;
 331 }
 332
 333 /* Whether reverse storage order is supported on the target.  */
 334 static int reverse_storage_order_supported = -1;
 335
 336 /* Check whether reverse storage order is supported on the target.  */
 337
 338 static void
 339 check_reverse_storage_order_support (void)
 340 {
 341   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 342     {
 343       reverse_storage_order_supported = 0;
 344       sorry ("reverse scalar storage order");
 345     }
 346   else
 347     reverse_storage_order_supported = 1;
 348 }
 349
 350 /* Whether reverse FP storage order is supported on the target.  */
 351 static int reverse_float_storage_order_supported = -1;
 352
 353 /* Check whether reverse FP storage order is supported on the target.  */
 354
 355 static void
 356 check_reverse_float_storage_order_support (void)
 357 {
 358   if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 359     {
 360       reverse_float_storage_order_supported = 0;
 361       sorry ("reverse floating-point scalar storage order");
 362     }
 363   else
 364     reverse_float_storage_order_supported = 1;
 365 }
 366
 367 /* Return an rtx representing value of X with reverse storage order.
 368    MODE is the intended mode of the result,
 369    useful if X is a CONST_INT.  */
 370
 371 rtx
 372 flip_storage_order (machine_mode mode, rtx x)
 373 {
 374   scalar_int_mode int_mode;
 375   rtx result;
 376
 377   if (mode == QImode)
 378     return x;
 379
 380   if (COMPLEX_MODE_P (mode))
 381     {
 382       rtx real = read_complex_part (x, false);
 383       rtx imag = read_complex_part (x, true);
 384
 385       real = flip_storage_order (GET_MODE_INNER (mode), real);
 386       imag = flip_storage_order (GET_MODE_INNER (mode), imag);
 387
 388       return gen_rtx_CONCAT (mode, real, imag);
 389     }
 390
 391   if (__builtin_expect (reverse_storage_order_supported < 0, 0))
 392     check_reverse_storage_order_support ();
 393
 394   if (!is_a <scalar_int_mode> (mode, &int_mode))
 395     {
 396       if (FLOAT_MODE_P (mode)
 397           && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
 398         check_reverse_float_storage_order_support ();
 399
 400       if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode))
 401         {
 402           sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
 403           return x;
 404         }
 405       x = gen_lowpart (int_mode, x);
 406     }
 407
 408   result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
 409   if (result == 0)
 410     result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
 411
 412   if (int_mode != mode)
 413     result = gen_lowpart (mode, result);
 414
 415   return result;
 416 }
 417
 418 /* If MODE is set, adjust bitfield memory MEM so that it points to the
 419    first unit of mode MODE that contains a bitfield of size BITSIZE at
 420    bit position BITNUM.  If MODE is not set, return a BLKmode reference
 421    to every byte in the bitfield.  Set *NEW_BITNUM to the bit position
 422    of the field within the new memory.  */
 423
 424 static rtx
 425 narrow_bit_field_mem (rtx mem, opt_scalar_int_mode mode,
 426                       unsigned HOST_WIDE_INT bitsize,
 427                       unsigned HOST_WIDE_INT bitnum,
 428                       unsigned HOST_WIDE_INT *new_bitnum)
 429 {
 430   scalar_int_mode imode;
 431   if (mode.exists (&imode))
 432     {
 433       unsigned int unit = GET_MODE_BITSIZE (imode);
 434       *new_bitnum = bitnum % unit;
 435       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 436       return adjust_bitfield_address (mem, imode, offset);
 437     }
 438   else
 439     {
 440       *new_bitnum = bitnum % BITS_PER_UNIT;
 441       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 442       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 443                             / BITS_PER_UNIT);
 444       return adjust_bitfield_address_size (mem, BLKmode, offset, size);
 445     }
 446 }
 447
 448 /* The caller wants to perform insertion or extraction PATTERN on a
 449    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 450    BITREGION_START and BITREGION_END are as for store_bit_field
 451    and FIELDMODE is the natural mode of the field.
 452
 453    Search for a mode that is compatible with the memory access
 454    restrictions and (where applicable) with a register insertion or
 455    extraction.  Return the new memory on success, storing the adjusted
 456    bit position in *NEW_BITNUM.  Return null otherwise.  */
 457
 458 static rtx
 459 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 460                               rtx op0, HOST_WIDE_INT bitsize,
 461                               HOST_WIDE_INT bitnum,
 462                               unsigned HOST_WIDE_INT bitregion_start,
 463                               unsigned HOST_WIDE_INT bitregion_end,
 464                               machine_mode fieldmode,
 465                               unsigned HOST_WIDE_INT *new_bitnum)
 466 {
 467   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 468                                 bitregion_end, MEM_ALIGN (op0),
 469                                 MEM_VOLATILE_P (op0));
 470   scalar_int_mode best_mode;
 471   if (iter.next_mode (&best_mode))
 472     {
 473       /* We can use a memory in BEST_MODE.  See whether this is true for
 474          any wider modes.  All other things being equal, we prefer to
 475          use the widest mode possible because it tends to expose more
 476          CSE opportunities.  */
 477       if (!iter.prefer_smaller_modes ())
 478         {
 479           /* Limit the search to the mode required by the corresponding
 480              register insertion or extraction instruction, if any.  */
 481           scalar_int_mode limit_mode = word_mode;
 482           extraction_insn insn;
 483           if (get_best_reg_extraction_insn (&insn, pattern,
 484                                             GET_MODE_BITSIZE (best_mode),
 485                                             fieldmode))
 486             limit_mode = insn.field_mode;
 487
 488           scalar_int_mode wider_mode;
 489           while (iter.next_mode (&wider_mode)
 490                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 491             best_mode = wider_mode;
 492         }
 493       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 494                                    new_bitnum);
 495     }
 496   return NULL_RTX;
 497 }
 498
 499 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 500    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 501    offset is then BITNUM / BITS_PER_UNIT.  */
 502
 503 static bool
 504 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 505                      unsigned HOST_WIDE_INT bitsize,
 506                      machine_mode struct_mode)
 507 {
 508   if (BYTES_BIG_ENDIAN)
 509     return (bitnum % BITS_PER_UNIT == 0
 510             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 511                 || (bitnum + bitsize) % BITS_PER_WORD == 0));
 512   else
 513     return bitnum % BITS_PER_WORD == 0;
 514 }
 515
 516 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 517    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 518    Return false if the access would touch memory outside the range
 519    BITREGION_START to BITREGION_END for conformance to the C++ memory
 520    model.  */
 521
 522 static bool
 523 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 524                             unsigned HOST_WIDE_INT bitnum,
 525                             scalar_int_mode fieldmode,
 526                             unsigned HOST_WIDE_INT bitregion_start,
 527                             unsigned HOST_WIDE_INT bitregion_end)
 528 {
 529   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 530
 531   /* -fstrict-volatile-bitfields must be enabled and we must have a
 532      volatile MEM.  */
 533   if (!MEM_P (op0)
 534       || !MEM_VOLATILE_P (op0)
 535       || flag_strict_volatile_bitfields <= 0)
 536     return false;
 537
 538   /* The bit size must not be larger than the field mode, and
 539      the field mode must not be larger than a word.  */
 540   if (bitsize > modesize || modesize > BITS_PER_WORD)
 541     return false;
 542
 543   /* Check for cases of unaligned fields that must be split.  */
 544   if (bitnum % modesize + bitsize > modesize)
 545     return false;
 546
 547   /* The memory must be sufficiently aligned for a MODESIZE access.
 548      This condition guarantees, that the memory access will not
 549      touch anything after the end of the structure.  */
 550   if (MEM_ALIGN (op0) < modesize)
 551     return false;
 552
 553   /* Check for cases where the C++ memory model applies.  */
 554   if (bitregion_end != 0
 555       && (bitnum - bitnum % modesize < bitregion_start
 556           || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
 557     return false;
 558
 559   return true;
 560 }
 561
 562 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 563    bit number BITNUM can be treated as a simple value of mode MODE.  */
 564
 565 static bool
 566 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 567                        unsigned HOST_WIDE_INT bitnum, machine_mode mode)
 568 {
 569   return (MEM_P (op0)
 570           && bitnum % BITS_PER_UNIT == 0
 571           && bitsize == GET_MODE_BITSIZE (mode)
 572           && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
 573               || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
 574                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 575 }
 576 \f
 577 /* Try to use instruction INSV to store VALUE into a field of OP0.
 578    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a
 579    BLKmode MEM.  VALUE_MODE is the mode of VALUE.  BITSIZE and BITNUM
 580    are as for store_bit_field.  */
 581
 582 static bool
 583 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 584                             opt_scalar_int_mode op0_mode,
 585                             unsigned HOST_WIDE_INT bitsize,
 586                             unsigned HOST_WIDE_INT bitnum,
 587                             rtx value, scalar_int_mode value_mode)
 588 {
 589   struct expand_operand ops[4];
 590   rtx value1;
 591   rtx xop0 = op0;
 592   rtx_insn *last = get_last_insn ();
 593   bool copy_back = false;
 594
 595   scalar_int_mode op_mode = insv->field_mode;
 596   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 597   if (bitsize == 0 || bitsize > unit)
 598     return false;
 599
 600   if (MEM_P (xop0))
 601     /* Get a reference to the first byte of the field.  */
 602     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 603                                  &bitnum);
 604   else
 605     {
 606       /* Convert from counting within OP0 to counting in OP_MODE.  */
 607       if (BYTES_BIG_ENDIAN)
 608         bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
 609
 610       /* If xop0 is a register, we need it in OP_MODE
 611          to make it acceptable to the format of insv.  */
 612       if (GET_CODE (xop0) == SUBREG)
 613         /* We can't just change the mode, because this might clobber op0,
 614            and we will need the original value of op0 if insv fails.  */
 615         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 616       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 617         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 618     }
 619
 620   /* If the destination is a paradoxical subreg such that we need a
 621      truncate to the inner mode, perform the insertion on a temporary and
 622      truncate the result to the original destination.  Note that we can't
 623      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 624      X) 0)) is (reg:N X).  */
 625   if (GET_CODE (xop0) == SUBREG
 626       && REG_P (SUBREG_REG (xop0))
 627       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 628                                          op_mode))
 629     {
 630       rtx tem = gen_reg_rtx (op_mode);
 631       emit_move_insn (tem, xop0);
 632       xop0 = tem;
 633       copy_back = true;
 634     }
 635
 636   /* There are similar overflow check at the start of store_bit_field_1,
 637      but that only check the situation where the field lies completely
 638      outside the register, while there do have situation where the field
 639      lies partialy in the register, we need to adjust bitsize for this
 640      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
 641      will broken on those arch support bit insert instruction, like arm, aarch64
 642      etc.  */
 643   if (bitsize + bitnum > unit && bitnum < unit)
 644     {
 645       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
 646                "destination object, data truncated into %wu-bit",
 647                bitsize, unit - bitnum);
 648       bitsize = unit - bitnum;
 649     }
 650
 651   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 652      "backwards" from the size of the unit we are inserting into.
 653      Otherwise, we count bits from the most significant on a
 654      BYTES/BITS_BIG_ENDIAN machine.  */
 655
 656   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 657     bitnum = unit - bitsize - bitnum;
 658
 659   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 660   value1 = value;
 661   if (value_mode != op_mode)
 662     {
 663       if (GET_MODE_BITSIZE (value_mode) >= bitsize)
 664         {
 665           rtx tmp;
 666           /* Optimization: Don't bother really extending VALUE
 667              if it has all the bits we will actually use.  However,
 668              if we must narrow it, be sure we do it correctly.  */
 669
 670           if (GET_MODE_SIZE (value_mode) < GET_MODE_SIZE (op_mode))
 671             {
 672               tmp = simplify_subreg (op_mode, value1, value_mode, 0);
 673               if (! tmp)
 674                 tmp = simplify_gen_subreg (op_mode,
 675                                            force_reg (value_mode, value1),
 676                                            value_mode, 0);
 677             }
 678           else
 679             {
 680               tmp = gen_lowpart_if_possible (op_mode, value1);
 681               if (! tmp)
 682                 tmp = gen_lowpart (op_mode, force_reg (value_mode, value1));
 683             }
 684           value1 = tmp;
 685         }
 686       else if (CONST_INT_P (value))
 687         value1 = gen_int_mode (INTVAL (value), op_mode);
 688       else
 689         /* Parse phase is supposed to make VALUE's data type
 690            match that of the component reference, which is a type
 691            at least as wide as the field; so VALUE should have
 692            a mode that corresponds to that type.  */
 693         gcc_assert (CONSTANT_P (value));
 694     }
 695
 696   create_fixed_operand (&ops[0], xop0);
 697   create_integer_operand (&ops[1], bitsize);
 698   create_integer_operand (&ops[2], bitnum);
 699   create_input_operand (&ops[3], value1, op_mode);
 700   if (maybe_expand_insn (insv->icode, 4, ops))
 701     {
 702       if (copy_back)
 703         convert_move (op0, xop0, true);
 704       return true;
 705     }
 706   delete_insns_since (last);
 707   return false;
 708 }
 709
 710 /* A subroutine of store_bit_field, with the same arguments.  Return true
 711    if the operation could be implemented.
 712
 713    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 714    no other way of implementing the operation.  If FALLBACK_P is false,
 715    return false instead.  */
 716
 717 static bool
 718 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 719                    unsigned HOST_WIDE_INT bitnum,
 720                    unsigned HOST_WIDE_INT bitregion_start,
 721                    unsigned HOST_WIDE_INT bitregion_end,
 722                    machine_mode fieldmode,
 723                    rtx value, bool reverse, bool fallback_p)
 724 {
 725   rtx op0 = str_rtx;
 726   rtx orig_value;
 727
 728   while (GET_CODE (op0) == SUBREG)
 729     {
 730       /* The following line once was done only if WORDS_BIG_ENDIAN,
 731          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 732          meaningful at a much higher level; when structures are copied
 733          between memory and regs, the higher-numbered regs
 734          always get higher addresses.  */
 735       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 736       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 737       int byte_offset = 0;
 738
 739       /* Paradoxical subregs need special handling on big-endian machines.  */
 740       if (paradoxical_subreg_p (op0))
 741         {
 742           int difference = inner_mode_size - outer_mode_size;
 743
 744           if (WORDS_BIG_ENDIAN)
 745             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 746           if (BYTES_BIG_ENDIAN)
 747             byte_offset += difference % UNITS_PER_WORD;
 748         }
 749       else
 750         byte_offset = SUBREG_BYTE (op0);
 751
 752       bitnum += byte_offset * BITS_PER_UNIT;
 753       op0 = SUBREG_REG (op0);
 754     }
 755
 756   /* No action is needed if the target is a register and if the field
 757      lies completely outside that register.  This can occur if the source
 758      code contains an out-of-bounds access to a small array.  */
 759   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 760     return true;
 761
 762   /* Use vec_set patterns for inserting parts of vectors whenever
 763      available.  */
 764   machine_mode outermode = GET_MODE (op0);
 765   scalar_mode innermode = GET_MODE_INNER (outermode);
 766   if (VECTOR_MODE_P (outermode)
 767       && !MEM_P (op0)
 768       && optab_handler (vec_set_optab, outermode) != CODE_FOR_nothing
 769       && fieldmode == innermode
 770       && bitsize == GET_MODE_BITSIZE (innermode)
 771       && !(bitnum % GET_MODE_BITSIZE (innermode)))
 772     {
 773       struct expand_operand ops[3];
 774       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 775       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 776
 777       create_fixed_operand (&ops[0], op0);
 778       create_input_operand (&ops[1], value, innermode);
 779       create_integer_operand (&ops[2], pos);
 780       if (maybe_expand_insn (icode, 3, ops))
 781         return true;
 782     }
 783
 784   /* If the target is a register, overwriting the entire object, or storing
 785      a full-word or multi-word field can be done with just a SUBREG.  */
 786   if (!MEM_P (op0)
 787       && bitsize == GET_MODE_BITSIZE (fieldmode)
 788       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 789           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 790     {
 791       /* Use the subreg machinery either to narrow OP0 to the required
 792          words or to cope with mode punning between equal-sized modes.
 793          In the latter case, use subreg on the rhs side, not lhs.  */
 794       rtx sub;
 795
 796       if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
 797         {
 798           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 799           if (sub)
 800             {
 801               if (reverse)
 802                 sub = flip_storage_order (GET_MODE (op0), sub);
 803               emit_move_insn (op0, sub);
 804               return true;
 805             }
 806         }
 807       else
 808         {
 809           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 810                                      bitnum / BITS_PER_UNIT);
 811           if (sub)
 812             {
 813               if (reverse)
 814                 value = flip_storage_order (fieldmode, value);
 815               emit_move_insn (sub, value);
 816               return true;
 817             }
 818         }
 819     }
 820
 821   /* If the target is memory, storing any naturally aligned field can be
 822      done with a simple store.  For targets that support fast unaligned
 823      memory, any naturally sized, unit aligned field can be done directly.  */
 824   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
 825     {
 826       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 827       if (reverse)
 828         value = flip_storage_order (fieldmode, value);
 829       emit_move_insn (op0, value);
 830       return true;
 831     }
 832
 833   /* Make sure we are playing with integral modes.  Pun with subregs
 834      if we aren't.  This must come after the entire register case above,
 835      since that case is valid for any mode.  The following cases are only
 836      valid for integral modes.  */
 837   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
 838   scalar_int_mode imode;
 839   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
 840     {
 841       if (MEM_P (op0))
 842         op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
 843                                             0, MEM_SIZE (op0));
 844       else
 845         op0 = gen_lowpart (op0_mode.require (), op0);
 846     }
 847
 848   /* Storing an lsb-aligned field in a register
 849      can be done with a movstrict instruction.  */
 850
 851   if (!MEM_P (op0)
 852       && !reverse
 853       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 854       && bitsize == GET_MODE_BITSIZE (fieldmode)
 855       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 856     {
 857       struct expand_operand ops[2];
 858       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 859       rtx arg0 = op0;
 860       unsigned HOST_WIDE_INT subreg_off;
 861
 862       if (GET_CODE (arg0) == SUBREG)
 863         {
 864           /* Else we've got some float mode source being extracted into
 865              a different float mode destination -- this combination of
 866              subregs results in Severe Tire Damage.  */
 867           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 868                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 869                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 870           arg0 = SUBREG_REG (arg0);
 871         }
 872
 873       subreg_off = bitnum / BITS_PER_UNIT;
 874       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 875         {
 876           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 877
 878           create_fixed_operand (&ops[0], arg0);
 879           /* Shrink the source operand to FIELDMODE.  */
 880           create_convert_operand_to (&ops[1], value, fieldmode, false);
 881           if (maybe_expand_insn (icode, 2, ops))
 882             return true;
 883         }
 884     }
 885
 886   /* Handle fields bigger than a word.  */
 887
 888   if (bitsize > BITS_PER_WORD)
 889     {
 890       /* Here we transfer the words of the field
 891          in the order least significant first.
 892          This is because the most significant word is the one which may
 893          be less than full.
 894          However, only do that if the value is not BLKmode.  */
 895
 896       const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 897       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 898       unsigned int i;
 899       rtx_insn *last;
 900
 901       /* This is the mode we must force value to, so that there will be enough
 902          subwords to extract.  Note that fieldmode will often (always?) be
 903          VOIDmode, because that is what store_field uses to indicate that this
 904          is a bit field, but passing VOIDmode to operand_subword_force
 905          is not allowed.  */
 906       fieldmode = GET_MODE (value);
 907       if (fieldmode == VOIDmode)
 908         fieldmode = smallest_int_mode_for_size (nwords * BITS_PER_WORD);
 909
 910       last = get_last_insn ();
 911       for (i = 0; i < nwords; i++)
 912         {
 913           /* If I is 0, use the low-order word in both field and target;
 914              if I is 1, use the next to lowest word; and so on.  */
 915           unsigned int wordnum = (backwards
 916                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 917                                   - i - 1
 918                                   : i);
 919           unsigned int bit_offset = (backwards ^ reverse
 920                                      ? MAX ((int) bitsize - ((int) i + 1)
 921                                             * BITS_PER_WORD,
 922                                             0)
 923                                      : (int) i * BITS_PER_WORD);
 924           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 925           unsigned HOST_WIDE_INT new_bitsize =
 926             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 927
 928           /* If the remaining chunk doesn't have full wordsize we have
 929              to make sure that for big-endian machines the higher order
 930              bits are used.  */
 931           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 932             value_word = simplify_expand_binop (word_mode, lshr_optab,
 933                                                 value_word,
 934                                                 GEN_INT (BITS_PER_WORD
 935                                                          - new_bitsize),
 936                                                 NULL_RTX, true,
 937                                                 OPTAB_LIB_WIDEN);
 938
 939           if (!store_bit_field_1 (op0, new_bitsize,
 940                                   bitnum + bit_offset,
 941                                   bitregion_start, bitregion_end,
 942                                   word_mode,
 943                                   value_word, reverse, fallback_p))
 944             {
 945               delete_insns_since (last);
 946               return false;
 947             }
 948         }
 949       return true;
 950     }
 951
 952   /* If VALUE has a floating-point or complex mode, access it as an
 953      integer of the corresponding size.  This can occur on a machine
 954      with 64 bit registers that uses SFmode for float.  It can also
 955      occur for unaligned float or complex fields.  */
 956   orig_value = value;
 957   scalar_int_mode value_mode;
 958   if (GET_MODE (value) == VOIDmode)
 959     /* By this point we've dealt with values that are bigger than a word,
 960        so word_mode is a conservatively correct choice.  */
 961     value_mode = word_mode;
 962   else if (!is_a <scalar_int_mode> (GET_MODE (value), &value_mode))
 963     {
 964       value_mode = int_mode_for_mode (GET_MODE (value)).require ();
 965       value = gen_reg_rtx (value_mode);
 966       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 967     }
 968
 969   /* If OP0 is a multi-word register, narrow it to the affected word.
 970      If the region spans two words, defer to store_split_bit_field.
 971      Don't do this if op0 is a single hard register wider than word
 972      such as a float or vector register.  */
 973   if (!MEM_P (op0)
 974       && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD
 975       && (!REG_P (op0)
 976           || !HARD_REGISTER_P (op0)
 977           || HARD_REGNO_NREGS (REGNO (op0), op0_mode.require ()) != 1))
 978     {
 979       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
 980         {
 981           if (!fallback_p)
 982             return false;
 983
 984           store_split_bit_field (op0, op0_mode, bitsize, bitnum,
 985                                  bitregion_start, bitregion_end,
 986                                  value, value_mode, reverse);
 987           return true;
 988         }
 989       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
 990                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 991       gcc_assert (op0);
 992       op0_mode = word_mode;
 993       bitnum %= BITS_PER_WORD;
 994     }
 995
 996   /* From here on we can assume that the field to be stored in fits
 997      within a word.  If the destination is a register, it too fits
 998      in a word.  */
 999
1000   extraction_insn insv;
1001   if (!MEM_P (op0)
1002       && !reverse
1003       && get_best_reg_extraction_insn (&insv, EP_insv,
1004                                        GET_MODE_BITSIZE (op0_mode.require ()),
1005                                        fieldmode)
1006       && store_bit_field_using_insv (&insv, op0, op0_mode,
1007                                      bitsize, bitnum, value, value_mode))
1008     return true;
1009
1010   /* If OP0 is a memory, try copying it to a register and seeing if a
1011      cheap register alternative is available.  */
1012   if (MEM_P (op0) && !reverse)
1013     {
1014       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1015                                         fieldmode)
1016           && store_bit_field_using_insv (&insv, op0, op0_mode,
1017                                          bitsize, bitnum, value, value_mode))
1018         return true;
1019
1020       rtx_insn *last = get_last_insn ();
1021
1022       /* Try loading part of OP0 into a register, inserting the bitfield
1023          into that, and then copying the result back to OP0.  */
1024       unsigned HOST_WIDE_INT bitpos;
1025       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1026                                                bitregion_start, bitregion_end,
1027                                                fieldmode, &bitpos);
1028       if (xop0)
1029         {
1030           rtx tempreg = copy_to_reg (xop0);
1031           if (store_bit_field_1 (tempreg, bitsize, bitpos,
1032                                  bitregion_start, bitregion_end,
1033                                  fieldmode, orig_value, reverse, false))
1034             {
1035               emit_move_insn (xop0, tempreg);
1036               return true;
1037             }
1038           delete_insns_since (last);
1039         }
1040     }
1041
1042   if (!fallback_p)
1043     return false;
1044
1045   store_fixed_bit_field (op0, op0_mode, bitsize, bitnum, bitregion_start,
1046                          bitregion_end, value, value_mode, reverse);
1047   return true;
1048 }
1049
1050 /* Generate code to store value from rtx VALUE
1051    into a bit-field within structure STR_RTX
1052    containing BITSIZE bits starting at bit BITNUM.
1053
1054    BITREGION_START is bitpos of the first bitfield in this region.
1055    BITREGION_END is the bitpos of the ending bitfield in this region.
1056    These two fields are 0, if the C++ memory model does not apply,
1057    or we are not interested in keeping track of bitfield regions.
1058
1059    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1060
1061    If REVERSE is true, the store is to be done in reverse order.  */
1062
1063 void
1064 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1065                  unsigned HOST_WIDE_INT bitnum,
1066                  unsigned HOST_WIDE_INT bitregion_start,
1067                  unsigned HOST_WIDE_INT bitregion_end,
1068                  machine_mode fieldmode,
1069                  rtx value, bool reverse)
1070 {
1071   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1072   scalar_int_mode int_mode;
1073   if (is_a <scalar_int_mode> (fieldmode, &int_mode)
1074       && strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, int_mode,
1075                                      bitregion_start, bitregion_end))
1076     {
1077       /* Storing of a full word can be done with a simple store.
1078          We know here that the field can be accessed with one single
1079          instruction.  For targets that support unaligned memory,
1080          an unaligned access may be necessary.  */
1081       if (bitsize == GET_MODE_BITSIZE (int_mode))
1082         {
1083           str_rtx = adjust_bitfield_address (str_rtx, int_mode,
1084                                              bitnum / BITS_PER_UNIT);
1085           if (reverse)
1086             value = flip_storage_order (int_mode, value);
1087           gcc_assert (bitnum % BITS_PER_UNIT == 0);
1088           emit_move_insn (str_rtx, value);
1089         }
1090       else
1091         {
1092           rtx temp;
1093
1094           str_rtx = narrow_bit_field_mem (str_rtx, int_mode, bitsize, bitnum,
1095                                           &bitnum);
1096           gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (int_mode));
1097           temp = copy_to_reg (str_rtx);
1098           if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
1099                                   int_mode, value, reverse, true))
1100             gcc_unreachable ();
1101
1102           emit_move_insn (str_rtx, temp);
1103         }
1104
1105       return;
1106     }
1107
1108   /* Under the C++0x memory model, we must not touch bits outside the
1109      bit region.  Adjust the address to start at the beginning of the
1110      bit region.  */
1111   if (MEM_P (str_rtx) && bitregion_start > 0)
1112     {
1113       scalar_int_mode best_mode;
1114       machine_mode addr_mode = VOIDmode;
1115       HOST_WIDE_INT offset, size;
1116
1117       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1118
1119       offset = bitregion_start / BITS_PER_UNIT;
1120       bitnum -= bitregion_start;
1121       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1122       bitregion_end -= bitregion_start;
1123       bitregion_start = 0;
1124       if (get_best_mode (bitsize, bitnum,
1125                          bitregion_start, bitregion_end,
1126                          MEM_ALIGN (str_rtx), INT_MAX,
1127                          MEM_VOLATILE_P (str_rtx), &best_mode))
1128         addr_mode = best_mode;
1129       str_rtx = adjust_bitfield_address_size (str_rtx, addr_mode,
1130                                               offset, size);
1131     }
1132
1133   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1134                           bitregion_start, bitregion_end,
1135                           fieldmode, value, reverse, true))
1136     gcc_unreachable ();
1137 }
1138 \f
1139 /* Use shifts and boolean operations to store VALUE into a bit field of
1140    width BITSIZE in OP0, starting at bit BITNUM.  If OP0_MODE is defined,
1141    it is the mode of OP0, otherwise OP0 is a BLKmode MEM.  VALUE_MODE is
1142    the mode of VALUE.
1143
1144    If REVERSE is true, the store is to be done in reverse order.  */
1145
1146 static void
1147 store_fixed_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1148                        unsigned HOST_WIDE_INT bitsize,
1149                        unsigned HOST_WIDE_INT bitnum,
1150                        unsigned HOST_WIDE_INT bitregion_start,
1151                        unsigned HOST_WIDE_INT bitregion_end,
1152                        rtx value, scalar_int_mode value_mode, bool reverse)
1153 {
1154   /* There is a case not handled here:
1155      a structure with a known alignment of just a halfword
1156      and a field split across two aligned halfwords within the structure.
1157      Or likewise a structure with a known alignment of just a byte
1158      and a field split across two bytes.
1159      Such cases are not supposed to be able to occur.  */
1160
1161   scalar_int_mode best_mode;
1162   if (MEM_P (op0))
1163     {
1164       unsigned int max_bitsize = BITS_PER_WORD;
1165       scalar_int_mode imode;
1166       if (op0_mode.exists (&imode) && GET_MODE_BITSIZE (imode) < max_bitsize)
1167         max_bitsize = GET_MODE_BITSIZE (imode);
1168
1169       if (!get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1170                           MEM_ALIGN (op0), max_bitsize, MEM_VOLATILE_P (op0),
1171                           &best_mode))
1172         {
1173           /* The only way this should occur is if the field spans word
1174              boundaries.  */
1175           store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1176                                  bitregion_start, bitregion_end,
1177                                  value, value_mode, reverse);
1178           return;
1179         }
1180
1181       op0 = narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, &bitnum);
1182     }
1183   else
1184     best_mode = op0_mode.require ();
1185
1186   store_fixed_bit_field_1 (op0, best_mode, bitsize, bitnum,
1187                            value, value_mode, reverse);
1188 }
1189
1190 /* Helper function for store_fixed_bit_field, stores
1191    the bit field always using MODE, which is the mode of OP0.  The other
1192    arguments are as for store_fixed_bit_field.  */
1193
1194 static void
1195 store_fixed_bit_field_1 (rtx op0, scalar_int_mode mode,
1196                          unsigned HOST_WIDE_INT bitsize,
1197                          unsigned HOST_WIDE_INT bitnum,
1198                          rtx value, scalar_int_mode value_mode, bool reverse)
1199 {
1200   rtx temp;
1201   int all_zero = 0;
1202   int all_one = 0;
1203
1204   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1205      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1206
1207   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1208     /* BITNUM is the distance between our msb
1209        and that of the containing datum.
1210        Convert it to the distance from the lsb.  */
1211     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1212
1213   /* Now BITNUM is always the distance between our lsb
1214      and that of OP0.  */
1215
1216   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1217      we must first convert its mode to MODE.  */
1218
1219   if (CONST_INT_P (value))
1220     {
1221       unsigned HOST_WIDE_INT v = UINTVAL (value);
1222
1223       if (bitsize < HOST_BITS_PER_WIDE_INT)
1224         v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1225
1226       if (v == 0)
1227         all_zero = 1;
1228       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1229                 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1230                || (bitsize == HOST_BITS_PER_WIDE_INT
1231                    && v == HOST_WIDE_INT_M1U))
1232         all_one = 1;
1233
1234       value = lshift_value (mode, v, bitnum);
1235     }
1236   else
1237     {
1238       int must_and = (GET_MODE_BITSIZE (value_mode) != bitsize
1239                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1240
1241       if (value_mode != mode)
1242         value = convert_to_mode (mode, value, 1);
1243
1244       if (must_and)
1245         value = expand_binop (mode, and_optab, value,
1246                               mask_rtx (mode, 0, bitsize, 0),
1247                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1248       if (bitnum > 0)
1249         value = expand_shift (LSHIFT_EXPR, mode, value,
1250                               bitnum, NULL_RTX, 1);
1251     }
1252
1253   if (reverse)
1254     value = flip_storage_order (mode, value);
1255
1256   /* Now clear the chosen bits in OP0,
1257      except that if VALUE is -1 we need not bother.  */
1258   /* We keep the intermediates in registers to allow CSE to combine
1259      consecutive bitfield assignments.  */
1260
1261   temp = force_reg (mode, op0);
1262
1263   if (! all_one)
1264     {
1265       rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1266       if (reverse)
1267         mask = flip_storage_order (mode, mask);
1268       temp = expand_binop (mode, and_optab, temp, mask,
1269                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1270       temp = force_reg (mode, temp);
1271     }
1272
1273   /* Now logical-or VALUE into OP0, unless it is zero.  */
1274
1275   if (! all_zero)
1276     {
1277       temp = expand_binop (mode, ior_optab, temp, value,
1278                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1279       temp = force_reg (mode, temp);
1280     }
1281
1282   if (op0 != temp)
1283     {
1284       op0 = copy_rtx (op0);
1285       emit_move_insn (op0, temp);
1286     }
1287 }
1288 \f
1289 /* Store a bit field that is split across multiple accessible memory objects.
1290
1291    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1292    BITSIZE is the field width; BITPOS the position of its first bit
1293    (within the word).
1294    VALUE is the value to store, which has mode VALUE_MODE.
1295    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
1296    a BLKmode MEM.
1297
1298    If REVERSE is true, the store is to be done in reverse order.
1299
1300    This does not yet handle fields wider than BITS_PER_WORD.  */
1301
1302 static void
1303 store_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1304                        unsigned HOST_WIDE_INT bitsize,
1305                        unsigned HOST_WIDE_INT bitpos,
1306                        unsigned HOST_WIDE_INT bitregion_start,
1307                        unsigned HOST_WIDE_INT bitregion_end,
1308                        rtx value, scalar_int_mode value_mode, bool reverse)
1309 {
1310   unsigned int unit, total_bits, bitsdone = 0;
1311
1312   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1313      much at a time.  */
1314   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1315     unit = BITS_PER_WORD;
1316   else
1317     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1318
1319   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1320      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1321      again, and we will mutually recurse forever.  */
1322   if (MEM_P (op0) && op0_mode.exists ())
1323     unit = MIN (unit, GET_MODE_BITSIZE (op0_mode.require ()));
1324
1325   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1326      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1327      that VALUE might be a floating-point constant.  */
1328   if (CONSTANT_P (value) && !CONST_INT_P (value))
1329     {
1330       rtx word = gen_lowpart_common (word_mode, value);
1331
1332       if (word && (value != word))
1333         value = word;
1334       else
1335         value = gen_lowpart_common (word_mode, force_reg (value_mode, value));
1336       value_mode = word_mode;
1337     }
1338
1339   total_bits = GET_MODE_BITSIZE (value_mode);
1340
1341   while (bitsdone < bitsize)
1342     {
1343       unsigned HOST_WIDE_INT thissize;
1344       unsigned HOST_WIDE_INT thispos;
1345       unsigned HOST_WIDE_INT offset;
1346       rtx part;
1347
1348       offset = (bitpos + bitsdone) / unit;
1349       thispos = (bitpos + bitsdone) % unit;
1350
1351       /* When region of bytes we can touch is restricted, decrease
1352          UNIT close to the end of the region as needed.  If op0 is a REG
1353          or SUBREG of REG, don't do this, as there can't be data races
1354          on a register and we can expand shorter code in some cases.  */
1355       if (bitregion_end
1356           && unit > BITS_PER_UNIT
1357           && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1358           && !REG_P (op0)
1359           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1360         {
1361           unit = unit / 2;
1362           continue;
1363         }
1364
1365       /* THISSIZE must not overrun a word boundary.  Otherwise,
1366          store_fixed_bit_field will call us again, and we will mutually
1367          recurse forever.  */
1368       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1369       thissize = MIN (thissize, unit - thispos);
1370
1371       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1372         {
1373           /* Fetch successively less significant portions.  */
1374           if (CONST_INT_P (value))
1375             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1376                              >> (bitsize - bitsdone - thissize))
1377                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1378           /* Likewise, but the source is little-endian.  */
1379           else if (reverse)
1380             part = extract_fixed_bit_field (word_mode, value, value_mode,
1381                                             thissize,
1382                                             bitsize - bitsdone - thissize,
1383                                             NULL_RTX, 1, false);
1384           else
1385             /* The args are chosen so that the last part includes the
1386                lsb.  Give extract_bit_field the value it needs (with
1387                endianness compensation) to fetch the piece we want.  */
1388             part = extract_fixed_bit_field (word_mode, value, value_mode,
1389                                             thissize,
1390                                             total_bits - bitsize + bitsdone,
1391                                             NULL_RTX, 1, false);
1392         }
1393       else
1394         {
1395           /* Fetch successively more significant portions.  */
1396           if (CONST_INT_P (value))
1397             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1398                              >> bitsdone)
1399                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1400           /* Likewise, but the source is big-endian.  */
1401           else if (reverse)
1402             part = extract_fixed_bit_field (word_mode, value, value_mode,
1403                                             thissize,
1404                                             total_bits - bitsdone - thissize,
1405                                             NULL_RTX, 1, false);
1406           else
1407             part = extract_fixed_bit_field (word_mode, value, value_mode,
1408                                             thissize, bitsdone, NULL_RTX,
1409                                             1, false);
1410         }
1411
1412       /* If OP0 is a register, then handle OFFSET here.  */
1413       rtx op0_piece = op0;
1414       opt_scalar_int_mode op0_piece_mode = op0_mode;
1415       if (SUBREG_P (op0) || REG_P (op0))
1416         {
1417           scalar_int_mode imode;
1418           if (op0_mode.exists (&imode)
1419               && GET_MODE_SIZE (imode) < UNITS_PER_WORD)
1420             {
1421               if (offset)
1422                 op0_piece = const0_rtx;
1423             }
1424           else
1425             {
1426               op0_piece = operand_subword_force (op0,
1427                                                  offset * unit / BITS_PER_WORD,
1428                                                  GET_MODE (op0));
1429               op0_piece_mode = word_mode;
1430             }
1431           offset &= BITS_PER_WORD / unit - 1;
1432         }
1433
1434       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1435          it is just an out-of-bounds access.  Ignore it.  */
1436       if (op0_piece != const0_rtx)
1437         store_fixed_bit_field (op0_piece, op0_piece_mode, thissize,
1438                                offset * unit + thispos, bitregion_start,
1439                                bitregion_end, part, word_mode, reverse);
1440       bitsdone += thissize;
1441     }
1442 }
1443 \f
1444 /* A subroutine of extract_bit_field_1 that converts return value X
1445    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1446    to extract_bit_field.  */
1447
1448 static rtx
1449 convert_extracted_bit_field (rtx x, machine_mode mode,
1450                              machine_mode tmode, bool unsignedp)
1451 {
1452   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1453     return x;
1454
1455   /* If the x mode is not a scalar integral, first convert to the
1456      integer mode of that size and then access it as a floating-point
1457      value via a SUBREG.  */
1458   if (!SCALAR_INT_MODE_P (tmode))
1459     {
1460       scalar_int_mode int_mode = int_mode_for_mode (tmode).require ();
1461       x = convert_to_mode (int_mode, x, unsignedp);
1462       x = force_reg (int_mode, x);
1463       return gen_lowpart (tmode, x);
1464     }
1465
1466   return convert_to_mode (tmode, x, unsignedp);
1467 }
1468
1469 /* Try to use an ext(z)v pattern to extract a field from OP0.
1470    Return the extracted value on success, otherwise return null.
1471    EXTV describes the extraction instruction to use.  If OP0_MODE
1472    is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM.
1473    The other arguments are as for extract_bit_field.  */
1474
1475 static rtx
1476 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1477                               opt_scalar_int_mode op0_mode,
1478                               unsigned HOST_WIDE_INT bitsize,
1479                               unsigned HOST_WIDE_INT bitnum,
1480                               int unsignedp, rtx target,
1481                               machine_mode mode, machine_mode tmode)
1482 {
1483   struct expand_operand ops[4];
1484   rtx spec_target = target;
1485   rtx spec_target_subreg = 0;
1486   scalar_int_mode ext_mode = extv->field_mode;
1487   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1488
1489   if (bitsize == 0 || unit < bitsize)
1490     return NULL_RTX;
1491
1492   if (MEM_P (op0))
1493     /* Get a reference to the first byte of the field.  */
1494     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1495                                 &bitnum);
1496   else
1497     {
1498       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1499       if (BYTES_BIG_ENDIAN)
1500         bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
1501
1502       /* If op0 is a register, we need it in EXT_MODE to make it
1503          acceptable to the format of ext(z)v.  */
1504       if (GET_CODE (op0) == SUBREG && op0_mode.require () != ext_mode)
1505         return NULL_RTX;
1506       if (REG_P (op0) && op0_mode.require () != ext_mode)
1507         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1508     }
1509
1510   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1511      "backwards" from the size of the unit we are extracting from.
1512      Otherwise, we count bits from the most significant on a
1513      BYTES/BITS_BIG_ENDIAN machine.  */
1514
1515   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1516     bitnum = unit - bitsize - bitnum;
1517
1518   if (target == 0)
1519     target = spec_target = gen_reg_rtx (tmode);
1520
1521   if (GET_MODE (target) != ext_mode)
1522     {
1523       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1524          between the mode of the extraction (word_mode) and the target
1525          mode.  Instead, create a temporary and use convert_move to set
1526          the target.  */
1527       if (REG_P (target)
1528           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1529         {
1530           target = gen_lowpart (ext_mode, target);
1531           if (partial_subreg_p (GET_MODE (spec_target), ext_mode))
1532             spec_target_subreg = target;
1533         }
1534       else
1535         target = gen_reg_rtx (ext_mode);
1536     }
1537
1538   create_output_operand (&ops[0], target, ext_mode);
1539   create_fixed_operand (&ops[1], op0);
1540   create_integer_operand (&ops[2], bitsize);
1541   create_integer_operand (&ops[3], bitnum);
1542   if (maybe_expand_insn (extv->icode, 4, ops))
1543     {
1544       target = ops[0].value;
1545       if (target == spec_target)
1546         return target;
1547       if (target == spec_target_subreg)
1548         return spec_target;
1549       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1550     }
1551   return NULL_RTX;
1552 }
1553
1554 /* A subroutine of extract_bit_field, with the same arguments.
1555    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1556    if we can find no other means of implementing the operation.
1557    if FALLBACK_P is false, return NULL instead.  */
1558
1559 static rtx
1560 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1561                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1562                      machine_mode mode, machine_mode tmode,
1563                      bool reverse, bool fallback_p, rtx *alt_rtl)
1564 {
1565   rtx op0 = str_rtx;
1566   machine_mode mode1;
1567
1568   if (tmode == VOIDmode)
1569     tmode = mode;
1570
1571   while (GET_CODE (op0) == SUBREG)
1572     {
1573       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1574       op0 = SUBREG_REG (op0);
1575     }
1576
1577   /* If we have an out-of-bounds access to a register, just return an
1578      uninitialized register of the required mode.  This can occur if the
1579      source code contains an out-of-bounds access to a small array.  */
1580   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1581     return gen_reg_rtx (tmode);
1582
1583   if (REG_P (op0)
1584       && mode == GET_MODE (op0)
1585       && bitnum == 0
1586       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1587     {
1588       if (reverse)
1589         op0 = flip_storage_order (mode, op0);
1590       /* We're trying to extract a full register from itself.  */
1591       return op0;
1592     }
1593
1594   /* First try to check for vector from vector extractions.  */
1595   if (VECTOR_MODE_P (GET_MODE (op0))
1596       && !MEM_P (op0)
1597       && VECTOR_MODE_P (tmode)
1598       && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (tmode))
1599     {
1600       machine_mode new_mode = GET_MODE (op0);
1601       if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1602         {
1603           new_mode = mode_for_vector (GET_MODE_INNER (tmode),
1604                                       GET_MODE_BITSIZE (GET_MODE (op0))
1605                                       / GET_MODE_UNIT_BITSIZE (tmode));
1606           if (!VECTOR_MODE_P (new_mode)
1607               || GET_MODE_SIZE (new_mode) != GET_MODE_SIZE (GET_MODE (op0))
1608               || GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode)
1609               || !targetm.vector_mode_supported_p (new_mode))
1610             new_mode = VOIDmode;
1611         }
1612       if (new_mode != VOIDmode
1613           && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1614               != CODE_FOR_nothing)
1615           && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (tmode)
1616               == bitnum / GET_MODE_BITSIZE (tmode)))
1617         {
1618           struct expand_operand ops[3];
1619           machine_mode outermode = new_mode;
1620           machine_mode innermode = tmode;
1621           enum insn_code icode
1622             = convert_optab_handler (vec_extract_optab, outermode, innermode);
1623           unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1624
1625           if (new_mode != GET_MODE (op0))
1626             op0 = gen_lowpart (new_mode, op0);
1627           create_output_operand (&ops[0], target, innermode);
1628           ops[0].target = 1;
1629           create_input_operand (&ops[1], op0, outermode);
1630           create_integer_operand (&ops[2], pos);
1631           if (maybe_expand_insn (icode, 3, ops))
1632             {
1633               if (alt_rtl && ops[0].target)
1634                 *alt_rtl = target;
1635               target = ops[0].value;
1636               if (GET_MODE (target) != mode)
1637                 return gen_lowpart (tmode, target);
1638               return target;
1639             }
1640         }
1641     }
1642
1643   /* See if we can get a better vector mode before extracting.  */
1644   if (VECTOR_MODE_P (GET_MODE (op0))
1645       && !MEM_P (op0)
1646       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1647     {
1648       machine_mode new_mode;
1649
1650       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1651         new_mode = MIN_MODE_VECTOR_FLOAT;
1652       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1653         new_mode = MIN_MODE_VECTOR_FRACT;
1654       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1655         new_mode = MIN_MODE_VECTOR_UFRACT;
1656       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1657         new_mode = MIN_MODE_VECTOR_ACCUM;
1658       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1659         new_mode = MIN_MODE_VECTOR_UACCUM;
1660       else
1661         new_mode = MIN_MODE_VECTOR_INT;
1662
1663       FOR_EACH_MODE_FROM (new_mode, new_mode)
1664         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1665             && GET_MODE_UNIT_SIZE (new_mode) == GET_MODE_SIZE (tmode)
1666             && targetm.vector_mode_supported_p (new_mode))
1667           break;
1668       if (new_mode != VOIDmode)
1669         op0 = gen_lowpart (new_mode, op0);
1670     }
1671
1672   /* Use vec_extract patterns for extracting parts of vectors whenever
1673      available.  */
1674   machine_mode outermode = GET_MODE (op0);
1675   scalar_mode innermode = GET_MODE_INNER (outermode);
1676   if (VECTOR_MODE_P (outermode)
1677       && !MEM_P (op0)
1678       && (convert_optab_handler (vec_extract_optab, outermode, innermode)
1679           != CODE_FOR_nothing)
1680       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (innermode)
1681           == bitnum / GET_MODE_BITSIZE (innermode)))
1682     {
1683       struct expand_operand ops[3];
1684       enum insn_code icode
1685         = convert_optab_handler (vec_extract_optab, outermode, innermode);
1686       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1687
1688       create_output_operand (&ops[0], target, innermode);
1689       ops[0].target = 1;
1690       create_input_operand (&ops[1], op0, outermode);
1691       create_integer_operand (&ops[2], pos);
1692       if (maybe_expand_insn (icode, 3, ops))
1693         {
1694           if (alt_rtl && ops[0].target)
1695             *alt_rtl = target;
1696           target = ops[0].value;
1697           if (GET_MODE (target) != mode)
1698             return gen_lowpart (tmode, target);
1699           return target;
1700         }
1701     }
1702
1703   /* Make sure we are playing with integral modes.  Pun with subregs
1704      if we aren't.  */
1705   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
1706   scalar_int_mode imode;
1707   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
1708     {
1709       if (MEM_P (op0))
1710         op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
1711                                             0, MEM_SIZE (op0));
1712       else if (op0_mode.exists (&imode))
1713         {
1714           op0 = gen_lowpart (imode, op0);
1715
1716           /* If we got a SUBREG, force it into a register since we
1717              aren't going to be able to do another SUBREG on it.  */
1718           if (GET_CODE (op0) == SUBREG)
1719             op0 = force_reg (imode, op0);
1720         }
1721       else
1722         {
1723           HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1724           rtx mem = assign_stack_temp (GET_MODE (op0), size);
1725           emit_move_insn (mem, op0);
1726           op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1727         }
1728     }
1729
1730   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1731      If that's wrong, the solution is to test for it and set TARGET to 0
1732      if needed.  */
1733
1734   /* Get the mode of the field to use for atomic access or subreg
1735      conversion.  */
1736   mode1 = mode;
1737   if (SCALAR_INT_MODE_P (tmode))
1738     {
1739       machine_mode try_mode = mode_for_size (bitsize,
1740                                                   GET_MODE_CLASS (tmode), 0);
1741       if (try_mode != BLKmode)
1742         mode1 = try_mode;
1743     }
1744   gcc_assert (mode1 != BLKmode);
1745
1746   /* Extraction of a full MODE1 value can be done with a subreg as long
1747      as the least significant bit of the value is the least significant
1748      bit of either OP0 or a word of OP0.  */
1749   if (!MEM_P (op0)
1750       && !reverse
1751       && lowpart_bit_field_p (bitnum, bitsize, op0_mode.require ())
1752       && bitsize == GET_MODE_BITSIZE (mode1)
1753       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, op0_mode.require ()))
1754     {
1755       rtx sub = simplify_gen_subreg (mode1, op0, op0_mode.require (),
1756                                      bitnum / BITS_PER_UNIT);
1757       if (sub)
1758         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1759     }
1760
1761   /* Extraction of a full MODE1 value can be done with a load as long as
1762      the field is on a byte boundary and is sufficiently aligned.  */
1763   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1764     {
1765       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1766       if (reverse)
1767         op0 = flip_storage_order (mode1, op0);
1768       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1769     }
1770
1771   /* Handle fields bigger than a word.  */
1772
1773   if (bitsize > BITS_PER_WORD)
1774     {
1775       /* Here we transfer the words of the field
1776          in the order least significant first.
1777          This is because the most significant word is the one which may
1778          be less than full.  */
1779
1780       const bool backwards = WORDS_BIG_ENDIAN;
1781       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1782       unsigned int i;
1783       rtx_insn *last;
1784
1785       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1786         target = gen_reg_rtx (mode);
1787
1788       /* In case we're about to clobber a base register or something
1789          (see gcc.c-torture/execute/20040625-1.c).   */
1790       if (reg_mentioned_p (target, str_rtx))
1791         target = gen_reg_rtx (mode);
1792
1793       /* Indicate for flow that the entire target reg is being set.  */
1794       emit_clobber (target);
1795
1796       last = get_last_insn ();
1797       for (i = 0; i < nwords; i++)
1798         {
1799           /* If I is 0, use the low-order word in both field and target;
1800              if I is 1, use the next to lowest word; and so on.  */
1801           /* Word number in TARGET to use.  */
1802           unsigned int wordnum
1803             = (backwards
1804                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1805                : i);
1806           /* Offset from start of field in OP0.  */
1807           unsigned int bit_offset = (backwards ^ reverse
1808                                      ? MAX ((int) bitsize - ((int) i + 1)
1809                                             * BITS_PER_WORD,
1810                                             0)
1811                                      : (int) i * BITS_PER_WORD);
1812           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1813           rtx result_part
1814             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1815                                              bitsize - i * BITS_PER_WORD),
1816                                    bitnum + bit_offset, 1, target_part,
1817                                    mode, word_mode, reverse, fallback_p, NULL);
1818
1819           gcc_assert (target_part);
1820           if (!result_part)
1821             {
1822               delete_insns_since (last);
1823               return NULL;
1824             }
1825
1826           if (result_part != target_part)
1827             emit_move_insn (target_part, result_part);
1828         }
1829
1830       if (unsignedp)
1831         {
1832           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1833              need to be zero'd out.  */
1834           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1835             {
1836               unsigned int i, total_words;
1837
1838               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1839               for (i = nwords; i < total_words; i++)
1840                 emit_move_insn
1841                   (operand_subword (target,
1842                                     backwards ? total_words - i - 1 : i,
1843                                     1, VOIDmode),
1844                    const0_rtx);
1845             }
1846           return target;
1847         }
1848
1849       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1850       target = expand_shift (LSHIFT_EXPR, mode, target,
1851                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1852       return expand_shift (RSHIFT_EXPR, mode, target,
1853                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1854     }
1855
1856   /* If OP0 is a multi-word register, narrow it to the affected word.
1857      If the region spans two words, defer to extract_split_bit_field.  */
1858   if (!MEM_P (op0) && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD)
1859     {
1860       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1861         {
1862           if (!fallback_p)
1863             return NULL_RTX;
1864           target = extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
1865                                             unsignedp, reverse);
1866           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1867         }
1868       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1869                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1870       op0_mode = word_mode;
1871       bitnum %= BITS_PER_WORD;
1872     }
1873
1874   /* From here on we know the desired field is smaller than a word.
1875      If OP0 is a register, it too fits within a word.  */
1876   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1877   extraction_insn extv;
1878   if (!MEM_P (op0)
1879       && !reverse
1880       /* ??? We could limit the structure size to the part of OP0 that
1881          contains the field, with appropriate checks for endianness
1882          and TRULY_NOOP_TRUNCATION.  */
1883       && get_best_reg_extraction_insn (&extv, pattern,
1884                                        GET_MODE_BITSIZE (op0_mode.require ()),
1885                                        tmode))
1886     {
1887       rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
1888                                                  bitsize, bitnum,
1889                                                  unsignedp, target, mode,
1890                                                  tmode);
1891       if (result)
1892         return result;
1893     }
1894
1895   /* If OP0 is a memory, try copying it to a register and seeing if a
1896      cheap register alternative is available.  */
1897   if (MEM_P (op0) & !reverse)
1898     {
1899       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1900                                         tmode))
1901         {
1902           rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
1903                                                      bitsize, bitnum,
1904                                                      unsignedp, target, mode,
1905                                                      tmode);
1906           if (result)
1907             return result;
1908         }
1909
1910       rtx_insn *last = get_last_insn ();
1911
1912       /* Try loading part of OP0 into a register and extracting the
1913          bitfield from that.  */
1914       unsigned HOST_WIDE_INT bitpos;
1915       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1916                                                0, 0, tmode, &bitpos);
1917       if (xop0)
1918         {
1919           xop0 = copy_to_reg (xop0);
1920           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1921                                             unsignedp, target,
1922                                             mode, tmode, reverse, false, NULL);
1923           if (result)
1924             return result;
1925           delete_insns_since (last);
1926         }
1927     }
1928
1929   if (!fallback_p)
1930     return NULL;
1931
1932   /* Find a correspondingly-sized integer field, so we can apply
1933      shifts and masks to it.  */
1934   scalar_int_mode int_mode;
1935   if (!int_mode_for_mode (tmode).exists (&int_mode))
1936     /* If this fails, we should probably push op0 out to memory and then
1937        do a load.  */
1938     int_mode = int_mode_for_mode (mode).require ();
1939
1940   target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize,
1941                                     bitnum, target, unsignedp, reverse);
1942
1943   /* Complex values must be reversed piecewise, so we need to undo the global
1944      reversal, convert to the complex mode and reverse again.  */
1945   if (reverse && COMPLEX_MODE_P (tmode))
1946     {
1947       target = flip_storage_order (int_mode, target);
1948       target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1949       target = flip_storage_order (tmode, target);
1950     }
1951   else
1952     target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1953
1954   return target;
1955 }
1956
1957 /* Generate code to extract a byte-field from STR_RTX
1958    containing BITSIZE bits, starting at BITNUM,
1959    and put it in TARGET if possible (if TARGET is nonzero).
1960    Regardless of TARGET, we return the rtx for where the value is placed.
1961
1962    STR_RTX is the structure containing the byte (a REG or MEM).
1963    UNSIGNEDP is nonzero if this is an unsigned bit field.
1964    MODE is the natural mode of the field value once extracted.
1965    TMODE is the mode the caller would like the value to have;
1966    but the value may be returned with type MODE instead.
1967
1968    If REVERSE is true, the extraction is to be done in reverse order.
1969
1970    If a TARGET is specified and we can store in it at no extra cost,
1971    we do so, and return TARGET.
1972    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1973    if they are equally easy.  */
1974
1975 rtx
1976 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1977                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1978                    machine_mode mode, machine_mode tmode, bool reverse,
1979                    rtx *alt_rtl)
1980 {
1981   machine_mode mode1;
1982
1983   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1984   if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1985     mode1 = GET_MODE (str_rtx);
1986   else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1987     mode1 = GET_MODE (target);
1988   else
1989     mode1 = tmode;
1990
1991   scalar_int_mode int_mode;
1992   if (is_a <scalar_int_mode> (mode1, &int_mode)
1993       && strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, int_mode, 0, 0))
1994     {
1995       /* Extraction of a full INT_MODE value can be done with a simple load.
1996          We know here that the field can be accessed with one single
1997          instruction.  For targets that support unaligned memory,
1998          an unaligned access may be necessary.  */
1999       if (bitsize == GET_MODE_BITSIZE (int_mode))
2000         {
2001           rtx result = adjust_bitfield_address (str_rtx, int_mode,
2002                                                 bitnum / BITS_PER_UNIT);
2003           if (reverse)
2004             result = flip_storage_order (int_mode, result);
2005           gcc_assert (bitnum % BITS_PER_UNIT == 0);
2006           return convert_extracted_bit_field (result, mode, tmode, unsignedp);
2007         }
2008
2009       str_rtx = narrow_bit_field_mem (str_rtx, int_mode, bitsize, bitnum,
2010                                       &bitnum);
2011       gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (int_mode));
2012       str_rtx = copy_to_reg (str_rtx);
2013     }
2014
2015   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
2016                               target, mode, tmode, reverse, true, alt_rtl);
2017 }
2018 \f
2019 /* Use shifts and boolean operations to extract a field of BITSIZE bits
2020    from bit BITNUM of OP0.  If OP0_MODE is defined, it is the mode of OP0,
2021    otherwise OP0 is a BLKmode MEM.
2022
2023    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
2024    If REVERSE is true, the extraction is to be done in reverse order.
2025
2026    If TARGET is nonzero, attempts to store the value there
2027    and return TARGET, but this is not guaranteed.
2028    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
2029
2030 static rtx
2031 extract_fixed_bit_field (machine_mode tmode, rtx op0,
2032                          opt_scalar_int_mode op0_mode,
2033                          unsigned HOST_WIDE_INT bitsize,
2034                          unsigned HOST_WIDE_INT bitnum, rtx target,
2035                          int unsignedp, bool reverse)
2036 {
2037   scalar_int_mode mode;
2038   if (MEM_P (op0))
2039     {
2040       if (!get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
2041                           BITS_PER_WORD, MEM_VOLATILE_P (op0), &mode))
2042         /* The only way this should occur is if the field spans word
2043            boundaries.  */
2044         return extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
2045                                         unsignedp, reverse);
2046
2047       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
2048     }
2049   else
2050     mode = op0_mode.require ();
2051
2052   return extract_fixed_bit_field_1 (tmode, op0, mode, bitsize, bitnum,
2053                                     target, unsignedp, reverse);
2054 }
2055
2056 /* Helper function for extract_fixed_bit_field, extracts
2057    the bit field always using MODE, which is the mode of OP0.
2058    The other arguments are as for extract_fixed_bit_field.  */
2059
2060 static rtx
2061 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, scalar_int_mode mode,
2062                            unsigned HOST_WIDE_INT bitsize,
2063                            unsigned HOST_WIDE_INT bitnum, rtx target,
2064                            int unsignedp, bool reverse)
2065 {
2066   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2067      for invalid input, such as extract equivalent of f5 from
2068      gcc.dg/pr48335-2.c.  */
2069
2070   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2071     /* BITNUM is the distance between our msb and that of OP0.
2072        Convert it to the distance from the lsb.  */
2073     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2074
2075   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2076      We have reduced the big-endian case to the little-endian case.  */
2077   if (reverse)
2078     op0 = flip_storage_order (mode, op0);
2079
2080   if (unsignedp)
2081     {
2082       if (bitnum)
2083         {
2084           /* If the field does not already start at the lsb,
2085              shift it so it does.  */
2086           /* Maybe propagate the target for the shift.  */
2087           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2088           if (tmode != mode)
2089             subtarget = 0;
2090           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2091         }
2092       /* Convert the value to the desired mode.  TMODE must also be a
2093          scalar integer for this conversion to make sense, since we
2094          shouldn't reinterpret the bits.  */
2095       scalar_int_mode new_mode = as_a <scalar_int_mode> (tmode);
2096       if (mode != new_mode)
2097         op0 = convert_to_mode (new_mode, op0, 1);
2098
2099       /* Unless the msb of the field used to be the msb when we shifted,
2100          mask out the upper bits.  */
2101
2102       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2103         return expand_binop (new_mode, and_optab, op0,
2104                              mask_rtx (new_mode, 0, bitsize, 0),
2105                              target, 1, OPTAB_LIB_WIDEN);
2106       return op0;
2107     }
2108
2109   /* To extract a signed bit-field, first shift its msb to the msb of the word,
2110      then arithmetic-shift its lsb to the lsb of the word.  */
2111   op0 = force_reg (mode, op0);
2112
2113   /* Find the narrowest integer mode that contains the field.  */
2114
2115   opt_scalar_int_mode mode_iter;
2116   FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
2117     if (GET_MODE_BITSIZE (mode_iter.require ()) >= bitsize + bitnum)
2118       break;
2119
2120   mode = mode_iter.require ();
2121   op0 = convert_to_mode (mode, op0, 0);
2122
2123   if (mode != tmode)
2124     target = 0;
2125
2126   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2127     {
2128       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2129       /* Maybe propagate the target for the shift.  */
2130       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2131       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2132     }
2133
2134   return expand_shift (RSHIFT_EXPR, mode, op0,
2135                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2136 }
2137
2138 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2139    VALUE << BITPOS.  */
2140
2141 static rtx
2142 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2143               int bitpos)
2144 {
2145   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2146 }
2147 \f
2148 /* Extract a bit field that is split across two words
2149    and return an RTX for the result.
2150
2151    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2152    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2153    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2154    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
2155    a BLKmode MEM.
2156
2157    If REVERSE is true, the extraction is to be done in reverse order.  */
2158
2159 static rtx
2160 extract_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
2161                          unsigned HOST_WIDE_INT bitsize,
2162                          unsigned HOST_WIDE_INT bitpos, int unsignedp,
2163                          bool reverse)
2164 {
2165   unsigned int unit;
2166   unsigned int bitsdone = 0;
2167   rtx result = NULL_RTX;
2168   int first = 1;
2169
2170   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2171      much at a time.  */
2172   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2173     unit = BITS_PER_WORD;
2174   else
2175     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2176
2177   while (bitsdone < bitsize)
2178     {
2179       unsigned HOST_WIDE_INT thissize;
2180       rtx part;
2181       unsigned HOST_WIDE_INT thispos;
2182       unsigned HOST_WIDE_INT offset;
2183
2184       offset = (bitpos + bitsdone) / unit;
2185       thispos = (bitpos + bitsdone) % unit;
2186
2187       /* THISSIZE must not overrun a word boundary.  Otherwise,
2188          extract_fixed_bit_field will call us again, and we will mutually
2189          recurse forever.  */
2190       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2191       thissize = MIN (thissize, unit - thispos);
2192
2193       /* If OP0 is a register, then handle OFFSET here.  */
2194       rtx op0_piece = op0;
2195       opt_scalar_int_mode op0_piece_mode = op0_mode;
2196       if (SUBREG_P (op0) || REG_P (op0))
2197         {
2198           op0_piece = operand_subword_force (op0, offset, op0_mode.require ());
2199           op0_piece_mode = word_mode;
2200           offset = 0;
2201         }
2202
2203       /* Extract the parts in bit-counting order,
2204          whose meaning is determined by BYTES_PER_UNIT.
2205          OFFSET is in UNITs, and UNIT is in bits.  */
2206       part = extract_fixed_bit_field (word_mode, op0_piece, op0_piece_mode,
2207                                       thissize, offset * unit + thispos,
2208                                       0, 1, reverse);
2209       bitsdone += thissize;
2210
2211       /* Shift this part into place for the result.  */
2212       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2213         {
2214           if (bitsize != bitsdone)
2215             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2216                                  bitsize - bitsdone, 0, 1);
2217         }
2218       else
2219         {
2220           if (bitsdone != thissize)
2221             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2222                                  bitsdone - thissize, 0, 1);
2223         }
2224
2225       if (first)
2226         result = part;
2227       else
2228         /* Combine the parts with bitwise or.  This works
2229            because we extracted each part as an unsigned bit field.  */
2230         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2231                                OPTAB_LIB_WIDEN);
2232
2233       first = 0;
2234     }
2235
2236   /* Unsigned bit field: we are done.  */
2237   if (unsignedp)
2238     return result;
2239   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2240   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2241                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2242   return expand_shift (RSHIFT_EXPR, word_mode, result,
2243                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2244 }
2245 \f
2246 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2247    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2248    MODE, fill the upper bits with zeros.  Fail if the layout of either
2249    mode is unknown (as for CC modes) or if the extraction would involve
2250    unprofitable mode punning.  Return the value on success, otherwise
2251    return null.
2252
2253    This is different from gen_lowpart* in these respects:
2254
2255      - the returned value must always be considered an rvalue
2256
2257      - when MODE is wider than SRC_MODE, the extraction involves
2258        a zero extension
2259
2260      - when MODE is smaller than SRC_MODE, the extraction involves
2261        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2262
2263    In other words, this routine performs a computation, whereas the
2264    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2265    operations.  */
2266
2267 rtx
2268 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2269 {
2270   scalar_int_mode int_mode, src_int_mode;
2271
2272   if (mode == src_mode)
2273     return src;
2274
2275   if (CONSTANT_P (src))
2276     {
2277       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2278          fails, it will happily create (subreg (symbol_ref)) or similar
2279          invalid SUBREGs.  */
2280       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2281       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2282       if (ret)
2283         return ret;
2284
2285       if (GET_MODE (src) == VOIDmode
2286           || !validate_subreg (mode, src_mode, src, byte))
2287         return NULL_RTX;
2288
2289       src = force_reg (GET_MODE (src), src);
2290       return gen_rtx_SUBREG (mode, src, byte);
2291     }
2292
2293   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2294     return NULL_RTX;
2295
2296   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2297       && MODES_TIEABLE_P (mode, src_mode))
2298     {
2299       rtx x = gen_lowpart_common (mode, src);
2300       if (x)
2301         return x;
2302     }
2303
2304   if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
2305       || !int_mode_for_mode (mode).exists (&int_mode))
2306     return NULL_RTX;
2307
2308   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2309     return NULL_RTX;
2310   if (!MODES_TIEABLE_P (int_mode, mode))
2311     return NULL_RTX;
2312
2313   src = gen_lowpart (src_int_mode, src);
2314   src = convert_modes (int_mode, src_int_mode, src, true);
2315   src = gen_lowpart (mode, src);
2316   return src;
2317 }
2318 \f
2319 /* Add INC into TARGET.  */
2320
2321 void
2322 expand_inc (rtx target, rtx inc)
2323 {
2324   rtx value = expand_binop (GET_MODE (target), add_optab,
2325                             target, inc,
2326                             target, 0, OPTAB_LIB_WIDEN);
2327   if (value != target)
2328     emit_move_insn (target, value);
2329 }
2330
2331 /* Subtract DEC from TARGET.  */
2332
2333 void
2334 expand_dec (rtx target, rtx dec)
2335 {
2336   rtx value = expand_binop (GET_MODE (target), sub_optab,
2337                             target, dec,
2338                             target, 0, OPTAB_LIB_WIDEN);
2339   if (value != target)
2340     emit_move_insn (target, value);
2341 }
2342 \f
2343 /* Output a shift instruction for expression code CODE,
2344    with SHIFTED being the rtx for the value to shift,
2345    and AMOUNT the rtx for the amount to shift by.
2346    Store the result in the rtx TARGET, if that is convenient.
2347    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2348    Return the rtx for where the value is.
2349    If that cannot be done, abort the compilation unless MAY_FAIL is true,
2350    in which case 0 is returned.  */
2351
2352 static rtx
2353 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2354                 rtx amount, rtx target, int unsignedp, bool may_fail = false)
2355 {
2356   rtx op1, temp = 0;
2357   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2358   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2359   optab lshift_optab = ashl_optab;
2360   optab rshift_arith_optab = ashr_optab;
2361   optab rshift_uns_optab = lshr_optab;
2362   optab lrotate_optab = rotl_optab;
2363   optab rrotate_optab = rotr_optab;
2364   machine_mode op1_mode;
2365   machine_mode scalar_mode = mode;
2366   int attempt;
2367   bool speed = optimize_insn_for_speed_p ();
2368
2369   if (VECTOR_MODE_P (mode))
2370     scalar_mode = GET_MODE_INNER (mode);
2371   op1 = amount;
2372   op1_mode = GET_MODE (op1);
2373
2374   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2375      shift amount is a vector, use the vector/vector shift patterns.  */
2376   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2377     {
2378       lshift_optab = vashl_optab;
2379       rshift_arith_optab = vashr_optab;
2380       rshift_uns_optab = vlshr_optab;
2381       lrotate_optab = vrotl_optab;
2382       rrotate_optab = vrotr_optab;
2383     }
2384
2385   /* Previously detected shift-counts computed by NEGATE_EXPR
2386      and shifted in the other direction; but that does not work
2387      on all machines.  */
2388
2389   if (SHIFT_COUNT_TRUNCATED)
2390     {
2391       if (CONST_INT_P (op1)
2392           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2393               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2394         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2395                        % GET_MODE_BITSIZE (scalar_mode));
2396       else if (GET_CODE (op1) == SUBREG
2397                && subreg_lowpart_p (op1)
2398                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2399                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2400         op1 = SUBREG_REG (op1);
2401     }
2402
2403   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2404      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2405      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2406      amount instead.  */
2407   if (rotate
2408       && CONST_INT_P (op1)
2409       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2410                    GET_MODE_BITSIZE (scalar_mode) - 1))
2411     {
2412       op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2413       left = !left;
2414       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2415     }
2416
2417   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2418      Note that this is not the case for bigger values.  For instance a rotation
2419      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2420      0x04030201 (bswapsi).  */
2421   if (rotate
2422       && CONST_INT_P (op1)
2423       && INTVAL (op1) == BITS_PER_UNIT
2424       && GET_MODE_SIZE (scalar_mode) == 2
2425       && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2426     return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2427                                   unsignedp);
2428
2429   if (op1 == const0_rtx)
2430     return shifted;
2431
2432   /* Check whether its cheaper to implement a left shift by a constant
2433      bit count by a sequence of additions.  */
2434   if (code == LSHIFT_EXPR
2435       && CONST_INT_P (op1)
2436       && INTVAL (op1) > 0
2437       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2438       && INTVAL (op1) < MAX_BITS_PER_WORD
2439       && (shift_cost (speed, mode, INTVAL (op1))
2440           > INTVAL (op1) * add_cost (speed, mode))
2441       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2442     {
2443       int i;
2444       for (i = 0; i < INTVAL (op1); i++)
2445         {
2446           temp = force_reg (mode, shifted);
2447           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2448                                   unsignedp, OPTAB_LIB_WIDEN);
2449         }
2450       return shifted;
2451     }
2452
2453   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2454     {
2455       enum optab_methods methods;
2456
2457       if (attempt == 0)
2458         methods = OPTAB_DIRECT;
2459       else if (attempt == 1)
2460         methods = OPTAB_WIDEN;
2461       else
2462         methods = OPTAB_LIB_WIDEN;
2463
2464       if (rotate)
2465         {
2466           /* Widening does not work for rotation.  */
2467           if (methods == OPTAB_WIDEN)
2468             continue;
2469           else if (methods == OPTAB_LIB_WIDEN)
2470             {
2471               /* If we have been unable to open-code this by a rotation,
2472                  do it as the IOR of two shifts.  I.e., to rotate A
2473                  by N bits, compute
2474                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2475                  where C is the bitsize of A.
2476
2477                  It is theoretically possible that the target machine might
2478                  not be able to perform either shift and hence we would
2479                  be making two libcalls rather than just the one for the
2480                  shift (similarly if IOR could not be done).  We will allow
2481                  this extremely unlikely lossage to avoid complicating the
2482                  code below.  */
2483
2484               rtx subtarget = target == shifted ? 0 : target;
2485               rtx new_amount, other_amount;
2486               rtx temp1;
2487
2488               new_amount = op1;
2489               if (op1 == const0_rtx)
2490                 return shifted;
2491               else if (CONST_INT_P (op1))
2492                 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2493                                         - INTVAL (op1));
2494               else
2495                 {
2496                   other_amount
2497                     = simplify_gen_unary (NEG, GET_MODE (op1),
2498                                           op1, GET_MODE (op1));
2499                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2500                   other_amount
2501                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2502                                            gen_int_mode (mask, GET_MODE (op1)));
2503                 }
2504
2505               shifted = force_reg (mode, shifted);
2506
2507               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2508                                      mode, shifted, new_amount, 0, 1);
2509               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2510                                       mode, shifted, other_amount,
2511                                       subtarget, 1);
2512               return expand_binop (mode, ior_optab, temp, temp1, target,
2513                                    unsignedp, methods);
2514             }
2515
2516           temp = expand_binop (mode,
2517                                left ? lrotate_optab : rrotate_optab,
2518                                shifted, op1, target, unsignedp, methods);
2519         }
2520       else if (unsignedp)
2521         temp = expand_binop (mode,
2522                              left ? lshift_optab : rshift_uns_optab,
2523                              shifted, op1, target, unsignedp, methods);
2524
2525       /* Do arithmetic shifts.
2526          Also, if we are going to widen the operand, we can just as well
2527          use an arithmetic right-shift instead of a logical one.  */
2528       if (temp == 0 && ! rotate
2529           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2530         {
2531           enum optab_methods methods1 = methods;
2532
2533           /* If trying to widen a log shift to an arithmetic shift,
2534              don't accept an arithmetic shift of the same size.  */
2535           if (unsignedp)
2536             methods1 = OPTAB_MUST_WIDEN;
2537
2538           /* Arithmetic shift */
2539
2540           temp = expand_binop (mode,
2541                                left ? lshift_optab : rshift_arith_optab,
2542                                shifted, op1, target, unsignedp, methods1);
2543         }
2544
2545       /* We used to try extzv here for logical right shifts, but that was
2546          only useful for one machine, the VAX, and caused poor code
2547          generation there for lshrdi3, so the code was deleted and a
2548          define_expand for lshrsi3 was added to vax.md.  */
2549     }
2550
2551   gcc_assert (temp != NULL_RTX || may_fail);
2552   return temp;
2553 }
2554
2555 /* Output a shift instruction for expression code CODE,
2556    with SHIFTED being the rtx for the value to shift,
2557    and AMOUNT the amount to shift by.
2558    Store the result in the rtx TARGET, if that is convenient.
2559    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2560    Return the rtx for where the value is.  */
2561
2562 rtx
2563 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2564               int amount, rtx target, int unsignedp)
2565 {
2566   return expand_shift_1 (code, mode,
2567                          shifted, GEN_INT (amount), target, unsignedp);
2568 }
2569
2570 /* Likewise, but return 0 if that cannot be done.  */
2571
2572 static rtx
2573 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2574                     int amount, rtx target, int unsignedp)
2575 {
2576   return expand_shift_1 (code, mode,
2577                          shifted, GEN_INT (amount), target, unsignedp, true);
2578 }
2579
2580 /* Output a shift instruction for expression code CODE,
2581    with SHIFTED being the rtx for the value to shift,
2582    and AMOUNT the tree for the amount to shift by.
2583    Store the result in the rtx TARGET, if that is convenient.
2584    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2585    Return the rtx for where the value is.  */
2586
2587 rtx
2588 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2589                        tree amount, rtx target, int unsignedp)
2590 {
2591   return expand_shift_1 (code, mode,
2592                          shifted, expand_normal (amount), target, unsignedp);
2593 }
2594
2595 \f
2596 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2597                         const struct mult_cost *, machine_mode mode);
2598 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2599                               const struct algorithm *, enum mult_variant);
2600 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2601 static rtx extract_high_half (scalar_int_mode, rtx);
2602 static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int);
2603 static rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx,
2604                                        int, int);
2605 /* Compute and return the best algorithm for multiplying by T.
2606    The algorithm must cost less than cost_limit
2607    If retval.cost >= COST_LIMIT, no algorithm was found and all
2608    other field of the returned struct are undefined.
2609    MODE is the machine mode of the multiplication.  */
2610
2611 static void
2612 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2613             const struct mult_cost *cost_limit, machine_mode mode)
2614 {
2615   int m;
2616   struct algorithm *alg_in, *best_alg;
2617   struct mult_cost best_cost;
2618   struct mult_cost new_limit;
2619   int op_cost, op_latency;
2620   unsigned HOST_WIDE_INT orig_t = t;
2621   unsigned HOST_WIDE_INT q;
2622   int maxm, hash_index;
2623   bool cache_hit = false;
2624   enum alg_code cache_alg = alg_zero;
2625   bool speed = optimize_insn_for_speed_p ();
2626   scalar_int_mode imode;
2627   struct alg_hash_entry *entry_ptr;
2628
2629   /* Indicate that no algorithm is yet found.  If no algorithm
2630      is found, this value will be returned and indicate failure.  */
2631   alg_out->cost.cost = cost_limit->cost + 1;
2632   alg_out->cost.latency = cost_limit->latency + 1;
2633
2634   if (cost_limit->cost < 0
2635       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2636     return;
2637
2638   /* Be prepared for vector modes.  */
2639   imode = as_a <scalar_int_mode> (GET_MODE_INNER (mode));
2640
2641   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2642
2643   /* Restrict the bits of "t" to the multiplication's mode.  */
2644   t &= GET_MODE_MASK (imode);
2645
2646   /* t == 1 can be done in zero cost.  */
2647   if (t == 1)
2648     {
2649       alg_out->ops = 1;
2650       alg_out->cost.cost = 0;
2651       alg_out->cost.latency = 0;
2652       alg_out->op[0] = alg_m;
2653       return;
2654     }
2655
2656   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2657      fail now.  */
2658   if (t == 0)
2659     {
2660       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2661         return;
2662       else
2663         {
2664           alg_out->ops = 1;
2665           alg_out->cost.cost = zero_cost (speed);
2666           alg_out->cost.latency = zero_cost (speed);
2667           alg_out->op[0] = alg_zero;
2668           return;
2669         }
2670     }
2671
2672   /* We'll be needing a couple extra algorithm structures now.  */
2673
2674   alg_in = XALLOCA (struct algorithm);
2675   best_alg = XALLOCA (struct algorithm);
2676   best_cost = *cost_limit;
2677
2678   /* Compute the hash index.  */
2679   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2680
2681   /* See if we already know what to do for T.  */
2682   entry_ptr = alg_hash_entry_ptr (hash_index);
2683   if (entry_ptr->t == t
2684       && entry_ptr->mode == mode
2685       && entry_ptr->speed == speed
2686       && entry_ptr->alg != alg_unknown)
2687     {
2688       cache_alg = entry_ptr->alg;
2689
2690       if (cache_alg == alg_impossible)
2691         {
2692           /* The cache tells us that it's impossible to synthesize
2693              multiplication by T within entry_ptr->cost.  */
2694           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2695             /* COST_LIMIT is at least as restrictive as the one
2696                recorded in the hash table, in which case we have no
2697                hope of synthesizing a multiplication.  Just
2698                return.  */
2699             return;
2700
2701           /* If we get here, COST_LIMIT is less restrictive than the
2702              one recorded in the hash table, so we may be able to
2703              synthesize a multiplication.  Proceed as if we didn't
2704              have the cache entry.  */
2705         }
2706       else
2707         {
2708           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2709             /* The cached algorithm shows that this multiplication
2710                requires more cost than COST_LIMIT.  Just return.  This
2711                way, we don't clobber this cache entry with
2712                alg_impossible but retain useful information.  */
2713             return;
2714
2715           cache_hit = true;
2716
2717           switch (cache_alg)
2718             {
2719             case alg_shift:
2720               goto do_alg_shift;
2721
2722             case alg_add_t_m2:
2723             case alg_sub_t_m2:
2724               goto do_alg_addsub_t_m2;
2725
2726             case alg_add_factor:
2727             case alg_sub_factor:
2728               goto do_alg_addsub_factor;
2729
2730             case alg_add_t2_m:
2731               goto do_alg_add_t2_m;
2732
2733             case alg_sub_t2_m:
2734               goto do_alg_sub_t2_m;
2735
2736             default:
2737               gcc_unreachable ();
2738             }
2739         }
2740     }
2741
2742   /* If we have a group of zero bits at the low-order part of T, try
2743      multiplying by the remaining bits and then doing a shift.  */
2744
2745   if ((t & 1) == 0)
2746     {
2747     do_alg_shift:
2748       m = ctz_or_zero (t); /* m = number of low zero bits */
2749       if (m < maxm)
2750         {
2751           q = t >> m;
2752           /* The function expand_shift will choose between a shift and
2753              a sequence of additions, so the observed cost is given as
2754              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2755           op_cost = m * add_cost (speed, mode);
2756           if (shift_cost (speed, mode, m) < op_cost)
2757             op_cost = shift_cost (speed, mode, m);
2758           new_limit.cost = best_cost.cost - op_cost;
2759           new_limit.latency = best_cost.latency - op_cost;
2760           synth_mult (alg_in, q, &new_limit, mode);
2761
2762           alg_in->cost.cost += op_cost;
2763           alg_in->cost.latency += op_cost;
2764           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2765             {
2766               best_cost = alg_in->cost;
2767               std::swap (alg_in, best_alg);
2768               best_alg->log[best_alg->ops] = m;
2769               best_alg->op[best_alg->ops] = alg_shift;
2770             }
2771
2772           /* See if treating ORIG_T as a signed number yields a better
2773              sequence.  Try this sequence only for a negative ORIG_T
2774              as it would be useless for a non-negative ORIG_T.  */
2775           if ((HOST_WIDE_INT) orig_t < 0)
2776             {
2777               /* Shift ORIG_T as follows because a right shift of a
2778                  negative-valued signed type is implementation
2779                  defined.  */
2780               q = ~(~orig_t >> m);
2781               /* The function expand_shift will choose between a shift
2782                  and a sequence of additions, so the observed cost is
2783                  given as MIN (m * add_cost(speed, mode),
2784                  shift_cost(speed, mode, m)).  */
2785               op_cost = m * add_cost (speed, mode);
2786               if (shift_cost (speed, mode, m) < op_cost)
2787                 op_cost = shift_cost (speed, mode, m);
2788               new_limit.cost = best_cost.cost - op_cost;
2789               new_limit.latency = best_cost.latency - op_cost;
2790               synth_mult (alg_in, q, &new_limit, mode);
2791
2792               alg_in->cost.cost += op_cost;
2793               alg_in->cost.latency += op_cost;
2794               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2795                 {
2796                   best_cost = alg_in->cost;
2797                   std::swap (alg_in, best_alg);
2798                   best_alg->log[best_alg->ops] = m;
2799                   best_alg->op[best_alg->ops] = alg_shift;
2800                 }
2801             }
2802         }
2803       if (cache_hit)
2804         goto done;
2805     }
2806
2807   /* If we have an odd number, add or subtract one.  */
2808   if ((t & 1) != 0)
2809     {
2810       unsigned HOST_WIDE_INT w;
2811
2812     do_alg_addsub_t_m2:
2813       for (w = 1; (w & t) != 0; w <<= 1)
2814         ;
2815       /* If T was -1, then W will be zero after the loop.  This is another
2816          case where T ends with ...111.  Handling this with (T + 1) and
2817          subtract 1 produces slightly better code and results in algorithm
2818          selection much faster than treating it like the ...0111 case
2819          below.  */
2820       if (w == 0
2821           || (w > 2
2822               /* Reject the case where t is 3.
2823                  Thus we prefer addition in that case.  */
2824               && t != 3))
2825         {
2826           /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2827
2828           op_cost = add_cost (speed, mode);
2829           new_limit.cost = best_cost.cost - op_cost;
2830           new_limit.latency = best_cost.latency - op_cost;
2831           synth_mult (alg_in, t + 1, &new_limit, mode);
2832
2833           alg_in->cost.cost += op_cost;
2834           alg_in->cost.latency += op_cost;
2835           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2836             {
2837               best_cost = alg_in->cost;
2838               std::swap (alg_in, best_alg);
2839               best_alg->log[best_alg->ops] = 0;
2840               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2841             }
2842         }
2843       else
2844         {
2845           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2846
2847           op_cost = add_cost (speed, mode);
2848           new_limit.cost = best_cost.cost - op_cost;
2849           new_limit.latency = best_cost.latency - op_cost;
2850           synth_mult (alg_in, t - 1, &new_limit, mode);
2851
2852           alg_in->cost.cost += op_cost;
2853           alg_in->cost.latency += op_cost;
2854           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2855             {
2856               best_cost = alg_in->cost;
2857               std::swap (alg_in, best_alg);
2858               best_alg->log[best_alg->ops] = 0;
2859               best_alg->op[best_alg->ops] = alg_add_t_m2;
2860             }
2861         }
2862
2863       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2864          quickly with a - a * n for some appropriate constant n.  */
2865       m = exact_log2 (-orig_t + 1);
2866       if (m >= 0 && m < maxm)
2867         {
2868           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2869           /* If the target has a cheap shift-and-subtract insn use
2870              that in preference to a shift insn followed by a sub insn.
2871              Assume that the shift-and-sub is "atomic" with a latency
2872              equal to it's cost, otherwise assume that on superscalar
2873              hardware the shift may be executed concurrently with the
2874              earlier steps in the algorithm.  */
2875           if (shiftsub1_cost (speed, mode, m) <= op_cost)
2876             {
2877               op_cost = shiftsub1_cost (speed, mode, m);
2878               op_latency = op_cost;
2879             }
2880           else
2881             op_latency = add_cost (speed, mode);
2882
2883           new_limit.cost = best_cost.cost - op_cost;
2884           new_limit.latency = best_cost.latency - op_latency;
2885           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2886                       &new_limit, mode);
2887
2888           alg_in->cost.cost += op_cost;
2889           alg_in->cost.latency += op_latency;
2890           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2891             {
2892               best_cost = alg_in->cost;
2893               std::swap (alg_in, best_alg);
2894               best_alg->log[best_alg->ops] = m;
2895               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2896             }
2897         }
2898
2899       if (cache_hit)
2900         goto done;
2901     }
2902
2903   /* Look for factors of t of the form
2904      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2905      If we find such a factor, we can multiply by t using an algorithm that
2906      multiplies by q, shift the result by m and add/subtract it to itself.
2907
2908      We search for large factors first and loop down, even if large factors
2909      are less probable than small; if we find a large factor we will find a
2910      good sequence quickly, and therefore be able to prune (by decreasing
2911      COST_LIMIT) the search.  */
2912
2913  do_alg_addsub_factor:
2914   for (m = floor_log2 (t - 1); m >= 2; m--)
2915     {
2916       unsigned HOST_WIDE_INT d;
2917
2918       d = (HOST_WIDE_INT_1U << m) + 1;
2919       if (t % d == 0 && t > d && m < maxm
2920           && (!cache_hit || cache_alg == alg_add_factor))
2921         {
2922           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2923           if (shiftadd_cost (speed, mode, m) <= op_cost)
2924             op_cost = shiftadd_cost (speed, mode, m);
2925
2926           op_latency = op_cost;
2927
2928
2929           new_limit.cost = best_cost.cost - op_cost;
2930           new_limit.latency = best_cost.latency - op_latency;
2931           synth_mult (alg_in, t / d, &new_limit, mode);
2932
2933           alg_in->cost.cost += op_cost;
2934           alg_in->cost.latency += op_latency;
2935           if (alg_in->cost.latency < op_cost)
2936             alg_in->cost.latency = op_cost;
2937           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2938             {
2939               best_cost = alg_in->cost;
2940               std::swap (alg_in, best_alg);
2941               best_alg->log[best_alg->ops] = m;
2942               best_alg->op[best_alg->ops] = alg_add_factor;
2943             }
2944           /* Other factors will have been taken care of in the recursion.  */
2945           break;
2946         }
2947
2948       d = (HOST_WIDE_INT_1U << m) - 1;
2949       if (t % d == 0 && t > d && m < maxm
2950           && (!cache_hit || cache_alg == alg_sub_factor))
2951         {
2952           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2953           if (shiftsub0_cost (speed, mode, m) <= op_cost)
2954             op_cost = shiftsub0_cost (speed, mode, m);
2955
2956           op_latency = op_cost;
2957
2958           new_limit.cost = best_cost.cost - op_cost;
2959           new_limit.latency = best_cost.latency - op_latency;
2960           synth_mult (alg_in, t / d, &new_limit, mode);
2961
2962           alg_in->cost.cost += op_cost;
2963           alg_in->cost.latency += op_latency;
2964           if (alg_in->cost.latency < op_cost)
2965             alg_in->cost.latency = op_cost;
2966           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2967             {
2968               best_cost = alg_in->cost;
2969               std::swap (alg_in, best_alg);
2970               best_alg->log[best_alg->ops] = m;
2971               best_alg->op[best_alg->ops] = alg_sub_factor;
2972             }
2973           break;
2974         }
2975     }
2976   if (cache_hit)
2977     goto done;
2978
2979   /* Try shift-and-add (load effective address) instructions,
2980      i.e. do a*3, a*5, a*9.  */
2981   if ((t & 1) != 0)
2982     {
2983     do_alg_add_t2_m:
2984       q = t - 1;
2985       m = ctz_hwi (q);
2986       if (q && m < maxm)
2987         {
2988           op_cost = shiftadd_cost (speed, mode, m);
2989           new_limit.cost = best_cost.cost - op_cost;
2990           new_limit.latency = best_cost.latency - op_cost;
2991           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2992
2993           alg_in->cost.cost += op_cost;
2994           alg_in->cost.latency += op_cost;
2995           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2996             {
2997               best_cost = alg_in->cost;
2998               std::swap (alg_in, best_alg);
2999               best_alg->log[best_alg->ops] = m;
3000               best_alg->op[best_alg->ops] = alg_add_t2_m;
3001             }
3002         }
3003       if (cache_hit)
3004         goto done;
3005
3006     do_alg_sub_t2_m:
3007       q = t + 1;
3008       m = ctz_hwi (q);
3009       if (q && m < maxm)
3010         {
3011           op_cost = shiftsub0_cost (speed, mode, m);
3012           new_limit.cost = best_cost.cost - op_cost;
3013           new_limit.latency = best_cost.latency - op_cost;
3014           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
3015
3016           alg_in->cost.cost += op_cost;
3017           alg_in->cost.latency += op_cost;
3018           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3019             {
3020               best_cost = alg_in->cost;
3021               std::swap (alg_in, best_alg);
3022               best_alg->log[best_alg->ops] = m;
3023               best_alg->op[best_alg->ops] = alg_sub_t2_m;
3024             }
3025         }
3026       if (cache_hit)
3027         goto done;
3028     }
3029
3030  done:
3031   /* If best_cost has not decreased, we have not found any algorithm.  */
3032   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
3033     {
3034       /* We failed to find an algorithm.  Record alg_impossible for
3035          this case (that is, <T, MODE, COST_LIMIT>) so that next time
3036          we are asked to find an algorithm for T within the same or
3037          lower COST_LIMIT, we can immediately return to the
3038          caller.  */
3039       entry_ptr->t = t;
3040       entry_ptr->mode = mode;
3041       entry_ptr->speed = speed;
3042       entry_ptr->alg = alg_impossible;
3043       entry_ptr->cost = *cost_limit;
3044       return;
3045     }
3046
3047   /* Cache the result.  */
3048   if (!cache_hit)
3049     {
3050       entry_ptr->t = t;
3051       entry_ptr->mode = mode;
3052       entry_ptr->speed = speed;
3053       entry_ptr->alg = best_alg->op[best_alg->ops];
3054       entry_ptr->cost.cost = best_cost.cost;
3055       entry_ptr->cost.latency = best_cost.latency;
3056     }
3057
3058   /* If we are getting a too long sequence for `struct algorithm'
3059      to record, make this search fail.  */
3060   if (best_alg->ops == MAX_BITS_PER_WORD)
3061     return;
3062
3063   /* Copy the algorithm from temporary space to the space at alg_out.
3064      We avoid using structure assignment because the majority of
3065      best_alg is normally undefined, and this is a critical function.  */
3066   alg_out->ops = best_alg->ops + 1;
3067   alg_out->cost = best_cost;
3068   memcpy (alg_out->op, best_alg->op,
3069           alg_out->ops * sizeof *alg_out->op);
3070   memcpy (alg_out->log, best_alg->log,
3071           alg_out->ops * sizeof *alg_out->log);
3072 }
3073 \f
3074 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3075    Try three variations:
3076
3077        - a shift/add sequence based on VAL itself
3078        - a shift/add sequence based on -VAL, followed by a negation
3079        - a shift/add sequence based on VAL - 1, followed by an addition.
3080
3081    Return true if the cheapest of these cost less than MULT_COST,
3082    describing the algorithm in *ALG and final fixup in *VARIANT.  */
3083
3084 bool
3085 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3086                      struct algorithm *alg, enum mult_variant *variant,
3087                      int mult_cost)
3088 {
3089   struct algorithm alg2;
3090   struct mult_cost limit;
3091   int op_cost;
3092   bool speed = optimize_insn_for_speed_p ();
3093
3094   /* Fail quickly for impossible bounds.  */
3095   if (mult_cost < 0)
3096     return false;
3097
3098   /* Ensure that mult_cost provides a reasonable upper bound.
3099      Any constant multiplication can be performed with less
3100      than 2 * bits additions.  */
3101   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3102   if (mult_cost > op_cost)
3103     mult_cost = op_cost;
3104
3105   *variant = basic_variant;
3106   limit.cost = mult_cost;
3107   limit.latency = mult_cost;
3108   synth_mult (alg, val, &limit, mode);
3109
3110   /* This works only if the inverted value actually fits in an
3111      `unsigned int' */
3112   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3113     {
3114       op_cost = neg_cost (speed, mode);
3115       if (MULT_COST_LESS (&alg->cost, mult_cost))
3116         {
3117           limit.cost = alg->cost.cost - op_cost;
3118           limit.latency = alg->cost.latency - op_cost;
3119         }
3120       else
3121         {
3122           limit.cost = mult_cost - op_cost;
3123           limit.latency = mult_cost - op_cost;
3124         }
3125
3126       synth_mult (&alg2, -val, &limit, mode);
3127       alg2.cost.cost += op_cost;
3128       alg2.cost.latency += op_cost;
3129       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3130         *alg = alg2, *variant = negate_variant;
3131     }
3132
3133   /* This proves very useful for division-by-constant.  */
3134   op_cost = add_cost (speed, mode);
3135   if (MULT_COST_LESS (&alg->cost, mult_cost))
3136     {
3137       limit.cost = alg->cost.cost - op_cost;
3138       limit.latency = alg->cost.latency - op_cost;
3139     }
3140   else
3141     {
3142       limit.cost = mult_cost - op_cost;
3143       limit.latency = mult_cost - op_cost;
3144     }
3145
3146   synth_mult (&alg2, val - 1, &limit, mode);
3147   alg2.cost.cost += op_cost;
3148   alg2.cost.latency += op_cost;
3149   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3150     *alg = alg2, *variant = add_variant;
3151
3152   return MULT_COST_LESS (&alg->cost, mult_cost);
3153 }
3154
3155 /* A subroutine of expand_mult, used for constant multiplications.
3156    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3157    convenient.  Use the shift/add sequence described by ALG and apply
3158    the final fixup specified by VARIANT.  */
3159
3160 static rtx
3161 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3162                    rtx target, const struct algorithm *alg,
3163                    enum mult_variant variant)
3164 {
3165   unsigned HOST_WIDE_INT val_so_far;
3166   rtx_insn *insn;
3167   rtx accum, tem;
3168   int opno;
3169   machine_mode nmode;
3170
3171   /* Avoid referencing memory over and over and invalid sharing
3172      on SUBREGs.  */
3173   op0 = force_reg (mode, op0);
3174
3175   /* ACCUM starts out either as OP0 or as a zero, depending on
3176      the first operation.  */
3177
3178   if (alg->op[0] == alg_zero)
3179     {
3180       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3181       val_so_far = 0;
3182     }
3183   else if (alg->op[0] == alg_m)
3184     {
3185       accum = copy_to_mode_reg (mode, op0);
3186       val_so_far = 1;
3187     }
3188   else
3189     gcc_unreachable ();
3190
3191   for (opno = 1; opno < alg->ops; opno++)
3192     {
3193       int log = alg->log[opno];
3194       rtx shift_subtarget = optimize ? 0 : accum;
3195       rtx add_target
3196         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3197            && !optimize)
3198           ? target : 0;
3199       rtx accum_target = optimize ? 0 : accum;
3200       rtx accum_inner;
3201
3202       switch (alg->op[opno])
3203         {
3204         case alg_shift:
3205           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3206           /* REG_EQUAL note will be attached to the following insn.  */
3207           emit_move_insn (accum, tem);
3208           val_so_far <<= log;
3209           break;
3210
3211         case alg_add_t_m2:
3212           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3213           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3214                                  add_target ? add_target : accum_target);
3215           val_so_far += HOST_WIDE_INT_1U << log;
3216           break;
3217
3218         case alg_sub_t_m2:
3219           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3220           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3221                                  add_target ? add_target : accum_target);
3222           val_so_far -= HOST_WIDE_INT_1U << log;
3223           break;
3224
3225         case alg_add_t2_m:
3226           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3227                                 log, shift_subtarget, 0);
3228           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3229                                  add_target ? add_target : accum_target);
3230           val_so_far = (val_so_far << log) + 1;
3231           break;
3232
3233         case alg_sub_t2_m:
3234           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3235                                 log, shift_subtarget, 0);
3236           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3237                                  add_target ? add_target : accum_target);
3238           val_so_far = (val_so_far << log) - 1;
3239           break;
3240
3241         case alg_add_factor:
3242           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3243           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3244                                  add_target ? add_target : accum_target);
3245           val_so_far += val_so_far << log;
3246           break;
3247
3248         case alg_sub_factor:
3249           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3250           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3251                                  (add_target
3252                                   ? add_target : (optimize ? 0 : tem)));
3253           val_so_far = (val_so_far << log) - val_so_far;
3254           break;
3255
3256         default:
3257           gcc_unreachable ();
3258         }
3259
3260       if (SCALAR_INT_MODE_P (mode))
3261         {
3262           /* Write a REG_EQUAL note on the last insn so that we can cse
3263              multiplication sequences.  Note that if ACCUM is a SUBREG,
3264              we've set the inner register and must properly indicate that.  */
3265           tem = op0, nmode = mode;
3266           accum_inner = accum;
3267           if (GET_CODE (accum) == SUBREG)
3268             {
3269               accum_inner = SUBREG_REG (accum);
3270               nmode = GET_MODE (accum_inner);
3271               tem = gen_lowpart (nmode, op0);
3272             }
3273
3274           insn = get_last_insn ();
3275           set_dst_reg_note (insn, REG_EQUAL,
3276                             gen_rtx_MULT (nmode, tem,
3277                                           gen_int_mode (val_so_far, nmode)),
3278                             accum_inner);
3279         }
3280     }
3281
3282   if (variant == negate_variant)
3283     {
3284       val_so_far = -val_so_far;
3285       accum = expand_unop (mode, neg_optab, accum, target, 0);
3286     }
3287   else if (variant == add_variant)
3288     {
3289       val_so_far = val_so_far + 1;
3290       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3291     }
3292
3293   /* Compare only the bits of val and val_so_far that are significant
3294      in the result mode, to avoid sign-/zero-extension confusion.  */
3295   nmode = GET_MODE_INNER (mode);
3296   val &= GET_MODE_MASK (nmode);
3297   val_so_far &= GET_MODE_MASK (nmode);
3298   gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3299
3300   return accum;
3301 }
3302
3303 /* Perform a multiplication and return an rtx for the result.
3304    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3305    TARGET is a suggestion for where to store the result (an rtx).
3306
3307    We check specially for a constant integer as OP1.
3308    If you want this check for OP0 as well, then before calling
3309    you should swap the two operands if OP0 would be constant.  */
3310
3311 rtx
3312 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3313              int unsignedp)
3314 {
3315   enum mult_variant variant;
3316   struct algorithm algorithm;
3317   rtx scalar_op1;
3318   int max_cost;
3319   bool speed = optimize_insn_for_speed_p ();
3320   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3321
3322   if (CONSTANT_P (op0))
3323     std::swap (op0, op1);
3324
3325   /* For vectors, there are several simplifications that can be made if
3326      all elements of the vector constant are identical.  */
3327   scalar_op1 = unwrap_const_vec_duplicate (op1);
3328
3329   if (INTEGRAL_MODE_P (mode))
3330     {
3331       rtx fake_reg;
3332       HOST_WIDE_INT coeff;
3333       bool is_neg;
3334       int mode_bitsize;
3335
3336       if (op1 == CONST0_RTX (mode))
3337         return op1;
3338       if (op1 == CONST1_RTX (mode))
3339         return op0;
3340       if (op1 == CONSTM1_RTX (mode))
3341         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3342                             op0, target, 0);
3343
3344       if (do_trapv)
3345         goto skip_synth;
3346
3347       /* If mode is integer vector mode, check if the backend supports
3348          vector lshift (by scalar or vector) at all.  If not, we can't use
3349          synthetized multiply.  */
3350       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3351           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3352           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3353         goto skip_synth;
3354
3355       /* These are the operations that are potentially turned into
3356          a sequence of shifts and additions.  */
3357       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3358
3359       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3360          less than or equal in size to `unsigned int' this doesn't matter.
3361          If the mode is larger than `unsigned int', then synth_mult works
3362          only if the constant value exactly fits in an `unsigned int' without
3363          any truncation.  This means that multiplying by negative values does
3364          not work; results are off by 2^32 on a 32 bit machine.  */
3365       if (CONST_INT_P (scalar_op1))
3366         {
3367           coeff = INTVAL (scalar_op1);
3368           is_neg = coeff < 0;
3369         }
3370 #if TARGET_SUPPORTS_WIDE_INT
3371       else if (CONST_WIDE_INT_P (scalar_op1))
3372 #else
3373       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3374 #endif
3375         {
3376           int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3377           /* Perfect power of 2 (other than 1, which is handled above).  */
3378           if (shift > 0)
3379             return expand_shift (LSHIFT_EXPR, mode, op0,
3380                                  shift, target, unsignedp);
3381           else
3382             goto skip_synth;
3383         }
3384       else
3385         goto skip_synth;
3386
3387       /* We used to test optimize here, on the grounds that it's better to
3388          produce a smaller program when -O is not used.  But this causes
3389          such a terrible slowdown sometimes that it seems better to always
3390          use synth_mult.  */
3391
3392       /* Special case powers of two.  */
3393       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3394           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3395         return expand_shift (LSHIFT_EXPR, mode, op0,
3396                              floor_log2 (coeff), target, unsignedp);
3397
3398       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3399
3400       /* Attempt to handle multiplication of DImode values by negative
3401          coefficients, by performing the multiplication by a positive
3402          multiplier and then inverting the result.  */
3403       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3404         {
3405           /* Its safe to use -coeff even for INT_MIN, as the
3406              result is interpreted as an unsigned coefficient.
3407              Exclude cost of op0 from max_cost to match the cost
3408              calculation of the synth_mult.  */
3409           coeff = -(unsigned HOST_WIDE_INT) coeff;
3410           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3411                                     mode, speed)
3412                       - neg_cost (speed, mode));
3413           if (max_cost <= 0)
3414             goto skip_synth;
3415
3416           /* Special case powers of two.  */
3417           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3418             {
3419               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3420                                        floor_log2 (coeff), target, unsignedp);
3421               return expand_unop (mode, neg_optab, temp, target, 0);
3422             }
3423
3424           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3425                                    max_cost))
3426             {
3427               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3428                                             &algorithm, variant);
3429               return expand_unop (mode, neg_optab, temp, target, 0);
3430             }
3431           goto skip_synth;
3432         }
3433
3434       /* Exclude cost of op0 from max_cost to match the cost
3435          calculation of the synth_mult.  */
3436       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3437       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3438         return expand_mult_const (mode, op0, coeff, target,
3439                                   &algorithm, variant);
3440     }
3441  skip_synth:
3442
3443   /* Expand x*2.0 as x+x.  */
3444   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3445       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3446     {
3447       op0 = force_reg (GET_MODE (op0), op0);
3448       return expand_binop (mode, add_optab, op0, op0,
3449                            target, unsignedp, OPTAB_LIB_WIDEN);
3450     }
3451
3452   /* This used to use umul_optab if unsigned, but for non-widening multiply
3453      there is no difference between signed and unsigned.  */
3454   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3455                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3456   gcc_assert (op0);
3457   return op0;
3458 }
3459
3460 /* Return a cost estimate for multiplying a register by the given
3461    COEFFicient in the given MODE and SPEED.  */
3462
3463 int
3464 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3465 {
3466   int max_cost;
3467   struct algorithm algorithm;
3468   enum mult_variant variant;
3469
3470   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3471   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3472                            mode, speed);
3473   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3474     return algorithm.cost.cost;
3475   else
3476     return max_cost;
3477 }
3478
3479 /* Perform a widening multiplication and return an rtx for the result.
3480    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3481    TARGET is a suggestion for where to store the result (an rtx).
3482    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3483    or smul_widen_optab.
3484
3485    We check specially for a constant integer as OP1, comparing the
3486    cost of a widening multiply against the cost of a sequence of shifts
3487    and adds.  */
3488
3489 rtx
3490 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3491                       int unsignedp, optab this_optab)
3492 {
3493   bool speed = optimize_insn_for_speed_p ();
3494   rtx cop1;
3495
3496   if (CONST_INT_P (op1)
3497       && GET_MODE (op0) != VOIDmode
3498       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3499                                 this_optab == umul_widen_optab))
3500       && CONST_INT_P (cop1)
3501       && (INTVAL (cop1) >= 0
3502           || HWI_COMPUTABLE_MODE_P (mode)))
3503     {
3504       HOST_WIDE_INT coeff = INTVAL (cop1);
3505       int max_cost;
3506       enum mult_variant variant;
3507       struct algorithm algorithm;
3508
3509       if (coeff == 0)
3510         return CONST0_RTX (mode);
3511
3512       /* Special case powers of two.  */
3513       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3514         {
3515           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3516           return expand_shift (LSHIFT_EXPR, mode, op0,
3517                                floor_log2 (coeff), target, unsignedp);
3518         }
3519
3520       /* Exclude cost of op0 from max_cost to match the cost
3521          calculation of the synth_mult.  */
3522       max_cost = mul_widen_cost (speed, mode);
3523       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3524                                max_cost))
3525         {
3526           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3527           return expand_mult_const (mode, op0, coeff, target,
3528                                     &algorithm, variant);
3529         }
3530     }
3531   return expand_binop (mode, this_optab, op0, op1, target,
3532                        unsignedp, OPTAB_LIB_WIDEN);
3533 }
3534 \f
3535 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3536    replace division by D, and put the least significant N bits of the result
3537    in *MULTIPLIER_PTR and return the most significant bit.
3538
3539    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3540    needed precision is in PRECISION (should be <= N).
3541
3542    PRECISION should be as small as possible so this function can choose
3543    multiplier more freely.
3544
3545    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3546    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3547
3548    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3549    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3550
3551 unsigned HOST_WIDE_INT
3552 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3553                    unsigned HOST_WIDE_INT *multiplier_ptr,
3554                    int *post_shift_ptr, int *lgup_ptr)
3555 {
3556   int lgup, post_shift;
3557   int pow, pow2;
3558
3559   /* lgup = ceil(log2(divisor)); */
3560   lgup = ceil_log2 (d);
3561
3562   gcc_assert (lgup <= n);
3563
3564   pow = n + lgup;
3565   pow2 = n + lgup - precision;
3566
3567   /* mlow = 2^(N + lgup)/d */
3568   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3569   wide_int mlow = wi::udiv_trunc (val, d);
3570
3571   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3572   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3573   wide_int mhigh = wi::udiv_trunc (val, d);
3574
3575   /* If precision == N, then mlow, mhigh exceed 2^N
3576      (but they do not exceed 2^(N+1)).  */
3577
3578   /* Reduce to lowest terms.  */
3579   for (post_shift = lgup; post_shift > 0; post_shift--)
3580     {
3581       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3582                                                        HOST_BITS_PER_WIDE_INT);
3583       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3584                                                        HOST_BITS_PER_WIDE_INT);
3585       if (ml_lo >= mh_lo)
3586         break;
3587
3588       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3589       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3590     }
3591
3592   *post_shift_ptr = post_shift;
3593   *lgup_ptr = lgup;
3594   if (n < HOST_BITS_PER_WIDE_INT)
3595     {
3596       unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3597       *multiplier_ptr = mhigh.to_uhwi () & mask;
3598       return mhigh.to_uhwi () >= mask;
3599     }
3600   else
3601     {
3602       *multiplier_ptr = mhigh.to_uhwi ();
3603       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3604     }
3605 }
3606
3607 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3608    congruent to 1 (mod 2**N).  */
3609
3610 static unsigned HOST_WIDE_INT
3611 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3612 {
3613   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3614
3615   /* The algorithm notes that the choice y = x satisfies
3616      x*y == 1 mod 2^3, since x is assumed odd.
3617      Each iteration doubles the number of bits of significance in y.  */
3618
3619   unsigned HOST_WIDE_INT mask;
3620   unsigned HOST_WIDE_INT y = x;
3621   int nbit = 3;
3622
3623   mask = (n == HOST_BITS_PER_WIDE_INT
3624           ? HOST_WIDE_INT_M1U
3625           : (HOST_WIDE_INT_1U << n) - 1);
3626
3627   while (nbit < n)
3628     {
3629       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3630       nbit *= 2;
3631     }
3632   return y;
3633 }
3634
3635 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3636    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3637    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3638    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3639    become signed.
3640
3641    The result is put in TARGET if that is convenient.
3642
3643    MODE is the mode of operation.  */
3644
3645 rtx
3646 expand_mult_highpart_adjust (scalar_int_mode mode, rtx adj_operand, rtx op0,
3647                              rtx op1, rtx target, int unsignedp)
3648 {
3649   rtx tem;
3650   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3651
3652   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3653                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3654   tem = expand_and (mode, tem, op1, NULL_RTX);
3655   adj_operand
3656     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3657                      adj_operand);
3658
3659   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3660                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3661   tem = expand_and (mode, tem, op0, NULL_RTX);
3662   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3663                           target);
3664
3665   return target;
3666 }
3667
3668 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3669
3670 static rtx
3671 extract_high_half (scalar_int_mode mode, rtx op)
3672 {
3673   if (mode == word_mode)
3674     return gen_highpart (mode, op);
3675
3676   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3677
3678   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3679                      GET_MODE_BITSIZE (mode), 0, 1);
3680   return convert_modes (mode, wider_mode, op, 0);
3681 }
3682
3683 /* Like expmed_mult_highpart, but only consider using a multiplication
3684    optab.  OP1 is an rtx for the constant operand.  */
3685
3686 static rtx
3687 expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1,
3688                             rtx target, int unsignedp, int max_cost)
3689 {
3690   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3691   optab moptab;
3692   rtx tem;
3693   int size;
3694   bool speed = optimize_insn_for_speed_p ();
3695
3696   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3697
3698   size = GET_MODE_BITSIZE (mode);
3699
3700   /* Firstly, try using a multiplication insn that only generates the needed
3701      high part of the product, and in the sign flavor of unsignedp.  */
3702   if (mul_highpart_cost (speed, mode) < max_cost)
3703     {
3704       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3705       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3706                           unsignedp, OPTAB_DIRECT);
3707       if (tem)
3708         return tem;
3709     }
3710
3711   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3712      Need to adjust the result after the multiplication.  */
3713   if (size - 1 < BITS_PER_WORD
3714       && (mul_highpart_cost (speed, mode)
3715           + 2 * shift_cost (speed, mode, size-1)
3716           + 4 * add_cost (speed, mode) < max_cost))
3717     {
3718       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3719       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3720                           unsignedp, OPTAB_DIRECT);
3721       if (tem)
3722         /* We used the wrong signedness.  Adjust the result.  */
3723         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3724                                             tem, unsignedp);
3725     }
3726
3727   /* Try widening multiplication.  */
3728   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3729   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3730       && mul_widen_cost (speed, wider_mode) < max_cost)
3731     {
3732       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3733                           unsignedp, OPTAB_WIDEN);
3734       if (tem)
3735         return extract_high_half (mode, tem);
3736     }
3737
3738   /* Try widening the mode and perform a non-widening multiplication.  */
3739   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3740       && size - 1 < BITS_PER_WORD
3741       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3742           < max_cost))
3743     {
3744       rtx_insn *insns;
3745       rtx wop0, wop1;
3746
3747       /* We need to widen the operands, for example to ensure the
3748          constant multiplier is correctly sign or zero extended.
3749          Use a sequence to clean-up any instructions emitted by
3750          the conversions if things don't work out.  */
3751       start_sequence ();
3752       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3753       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3754       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3755                           unsignedp, OPTAB_WIDEN);
3756       insns = get_insns ();
3757       end_sequence ();
3758
3759       if (tem)
3760         {
3761           emit_insn (insns);
3762           return extract_high_half (mode, tem);
3763         }
3764     }
3765
3766   /* Try widening multiplication of opposite signedness, and adjust.  */
3767   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3768   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3769       && size - 1 < BITS_PER_WORD
3770       && (mul_widen_cost (speed, wider_mode)
3771           + 2 * shift_cost (speed, mode, size-1)
3772           + 4 * add_cost (speed, mode) < max_cost))
3773     {
3774       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3775                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3776       if (tem != 0)
3777         {
3778           tem = extract_high_half (mode, tem);
3779           /* We used the wrong signedness.  Adjust the result.  */
3780           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3781                                               target, unsignedp);
3782         }
3783     }
3784
3785   return 0;
3786 }
3787
3788 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3789    putting the high half of the result in TARGET if that is convenient,
3790    and return where the result is.  If the operation can not be performed,
3791    0 is returned.
3792
3793    MODE is the mode of operation and result.
3794
3795    UNSIGNEDP nonzero means unsigned multiply.
3796
3797    MAX_COST is the total allowed cost for the expanded RTL.  */
3798
3799 static rtx
3800 expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1,
3801                       rtx target, int unsignedp, int max_cost)
3802 {
3803   unsigned HOST_WIDE_INT cnst1;
3804   int extra_cost;
3805   bool sign_adjust = false;
3806   enum mult_variant variant;
3807   struct algorithm alg;
3808   rtx tem;
3809   bool speed = optimize_insn_for_speed_p ();
3810
3811   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3812   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3813
3814   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3815
3816   /* We can't optimize modes wider than BITS_PER_WORD.
3817      ??? We might be able to perform double-word arithmetic if
3818      mode == word_mode, however all the cost calculations in
3819      synth_mult etc. assume single-word operations.  */
3820   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3821   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3822     return expmed_mult_highpart_optab (mode, op0, op1, target,
3823                                        unsignedp, max_cost);
3824
3825   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3826
3827   /* Check whether we try to multiply by a negative constant.  */
3828   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3829     {
3830       sign_adjust = true;
3831       extra_cost += add_cost (speed, mode);
3832     }
3833
3834   /* See whether shift/add multiplication is cheap enough.  */
3835   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3836                            max_cost - extra_cost))
3837     {
3838       /* See whether the specialized multiplication optabs are
3839          cheaper than the shift/add version.  */
3840       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3841                                         alg.cost.cost + extra_cost);
3842       if (tem)
3843         return tem;
3844
3845       tem = convert_to_mode (wider_mode, op0, unsignedp);
3846       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3847       tem = extract_high_half (mode, tem);
3848
3849       /* Adjust result for signedness.  */
3850       if (sign_adjust)
3851         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3852
3853       return tem;
3854     }
3855   return expmed_mult_highpart_optab (mode, op0, op1, target,
3856                                      unsignedp, max_cost);
3857 }
3858
3859
3860 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3861
3862 static rtx
3863 expand_smod_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
3864 {
3865   rtx result, temp, shift;
3866   rtx_code_label *label;
3867   int logd;
3868   int prec = GET_MODE_PRECISION (mode);
3869
3870   logd = floor_log2 (d);
3871   result = gen_reg_rtx (mode);
3872
3873   /* Avoid conditional branches when they're expensive.  */
3874   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3875       && optimize_insn_for_speed_p ())
3876     {
3877       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3878                                       mode, 0, -1);
3879       if (signmask)
3880         {
3881           HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
3882           signmask = force_reg (mode, signmask);
3883           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3884
3885           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3886              which instruction sequence to use.  If logical right shifts
3887              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3888              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3889
3890           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3891           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3892               || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
3893                   > COSTS_N_INSNS (2)))
3894             {
3895               temp = expand_binop (mode, xor_optab, op0, signmask,
3896                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3897               temp = expand_binop (mode, sub_optab, temp, signmask,
3898                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3899               temp = expand_binop (mode, and_optab, temp,
3900                                    gen_int_mode (masklow, mode),
3901                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3902               temp = expand_binop (mode, xor_optab, temp, signmask,
3903                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3904               temp = expand_binop (mode, sub_optab, temp, signmask,
3905                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3906             }
3907           else
3908             {
3909               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3910                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3911               signmask = force_reg (mode, signmask);
3912
3913               temp = expand_binop (mode, add_optab, op0, signmask,
3914                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3915               temp = expand_binop (mode, and_optab, temp,
3916                                    gen_int_mode (masklow, mode),
3917                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3918               temp = expand_binop (mode, sub_optab, temp, signmask,
3919                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3920             }
3921           return temp;
3922         }
3923     }
3924
3925   /* Mask contains the mode's signbit and the significant bits of the
3926      modulus.  By including the signbit in the operation, many targets
3927      can avoid an explicit compare operation in the following comparison
3928      against zero.  */
3929   wide_int mask = wi::mask (logd, false, prec);
3930   mask = wi::set_bit (mask, prec - 1);
3931
3932   temp = expand_binop (mode, and_optab, op0,
3933                        immed_wide_int_const (mask, mode),
3934                        result, 1, OPTAB_LIB_WIDEN);
3935   if (temp != result)
3936     emit_move_insn (result, temp);
3937
3938   label = gen_label_rtx ();
3939   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3940
3941   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3942                        0, OPTAB_LIB_WIDEN);
3943
3944   mask = wi::mask (logd, true, prec);
3945   temp = expand_binop (mode, ior_optab, temp,
3946                        immed_wide_int_const (mask, mode),
3947                        result, 1, OPTAB_LIB_WIDEN);
3948   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3949                        0, OPTAB_LIB_WIDEN);
3950   if (temp != result)
3951     emit_move_insn (result, temp);
3952   emit_label (label);
3953   return result;
3954 }
3955
3956 /* Expand signed division of OP0 by a power of two D in mode MODE.
3957    This routine is only called for positive values of D.  */
3958
3959 static rtx
3960 expand_sdiv_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
3961 {
3962   rtx temp;
3963   rtx_code_label *label;
3964   int logd;
3965
3966   logd = floor_log2 (d);
3967
3968   if (d == 2
3969       && BRANCH_COST (optimize_insn_for_speed_p (),
3970                       false) >= 1)
3971     {
3972       temp = gen_reg_rtx (mode);
3973       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3974       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3975                            0, OPTAB_LIB_WIDEN);
3976       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3977     }
3978
3979   if (HAVE_conditional_move
3980       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
3981     {
3982       rtx temp2;
3983
3984       start_sequence ();
3985       temp2 = copy_to_mode_reg (mode, op0);
3986       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3987                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3988       temp = force_reg (mode, temp);
3989
3990       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3991       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3992                                      mode, temp, temp2, mode, 0);
3993       if (temp2)
3994         {
3995           rtx_insn *seq = get_insns ();
3996           end_sequence ();
3997           emit_insn (seq);
3998           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3999         }
4000       end_sequence ();
4001     }
4002
4003   if (BRANCH_COST (optimize_insn_for_speed_p (),
4004                    false) >= 2)
4005     {
4006       int ushift = GET_MODE_BITSIZE (mode) - logd;
4007
4008       temp = gen_reg_rtx (mode);
4009       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
4010       if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
4011           || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
4012              > COSTS_N_INSNS (1))
4013         temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
4014                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
4015       else
4016         temp = expand_shift (RSHIFT_EXPR, mode, temp,
4017                              ushift, NULL_RTX, 1);
4018       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4019                            0, OPTAB_LIB_WIDEN);
4020       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4021     }
4022
4023   label = gen_label_rtx ();
4024   temp = copy_to_mode_reg (mode, op0);
4025   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
4026   expand_inc (temp, gen_int_mode (d - 1, mode));
4027   emit_label (label);
4028   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4029 }
4030 \f
4031 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
4032    if that is convenient, and returning where the result is.
4033    You may request either the quotient or the remainder as the result;
4034    specify REM_FLAG nonzero to get the remainder.
4035
4036    CODE is the expression code for which kind of division this is;
4037    it controls how rounding is done.  MODE is the machine mode to use.
4038    UNSIGNEDP nonzero means do unsigned division.  */
4039
4040 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4041    and then correct it by or'ing in missing high bits
4042    if result of ANDI is nonzero.
4043    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4044    This could optimize to a bfexts instruction.
4045    But C doesn't use these operations, so their optimizations are
4046    left for later.  */
4047 /* ??? For modulo, we don't actually need the highpart of the first product,
4048    the low part will do nicely.  And for small divisors, the second multiply
4049    can also be a low-part only multiply or even be completely left out.
4050    E.g. to calculate the remainder of a division by 3 with a 32 bit
4051    multiply, multiply with 0x55555556 and extract the upper two bits;
4052    the result is exact for inputs up to 0x1fffffff.
4053    The input range can be reduced by using cross-sum rules.
4054    For odd divisors >= 3, the following table gives right shift counts
4055    so that if a number is shifted by an integer multiple of the given
4056    amount, the remainder stays the same:
4057    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4058    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4059    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4060    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4061    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4062
4063    Cross-sum rules for even numbers can be derived by leaving as many bits
4064    to the right alone as the divisor has zeros to the right.
4065    E.g. if x is an unsigned 32 bit number:
4066    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4067    */
4068
4069 rtx
4070 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4071                rtx op0, rtx op1, rtx target, int unsignedp)
4072 {
4073   machine_mode compute_mode;
4074   rtx tquotient;
4075   rtx quotient = 0, remainder = 0;
4076   rtx_insn *last;
4077   rtx_insn *insn;
4078   optab optab1, optab2;
4079   int op1_is_constant, op1_is_pow2 = 0;
4080   int max_cost, extra_cost;
4081   static HOST_WIDE_INT last_div_const = 0;
4082   bool speed = optimize_insn_for_speed_p ();
4083
4084   op1_is_constant = CONST_INT_P (op1);
4085   if (op1_is_constant)
4086     {
4087       wide_int ext_op1 = rtx_mode_t (op1, mode);
4088       op1_is_pow2 = (wi::popcount (ext_op1) == 1
4089                      || (! unsignedp
4090                          && wi::popcount (wi::neg (ext_op1)) == 1));
4091     }
4092
4093   /*
4094      This is the structure of expand_divmod:
4095
4096      First comes code to fix up the operands so we can perform the operations
4097      correctly and efficiently.
4098
4099      Second comes a switch statement with code specific for each rounding mode.
4100      For some special operands this code emits all RTL for the desired
4101      operation, for other cases, it generates only a quotient and stores it in
4102      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4103      to indicate that it has not done anything.
4104
4105      Last comes code that finishes the operation.  If QUOTIENT is set and
4106      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4107      QUOTIENT is not set, it is computed using trunc rounding.
4108
4109      We try to generate special code for division and remainder when OP1 is a
4110      constant.  If |OP1| = 2**n we can use shifts and some other fast
4111      operations.  For other values of OP1, we compute a carefully selected
4112      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4113      by m.
4114
4115      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4116      half of the product.  Different strategies for generating the product are
4117      implemented in expmed_mult_highpart.
4118
4119      If what we actually want is the remainder, we generate that by another
4120      by-constant multiplication and a subtraction.  */
4121
4122   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4123      code below will malfunction if we are, so check here and handle
4124      the special case if so.  */
4125   if (op1 == const1_rtx)
4126     return rem_flag ? const0_rtx : op0;
4127
4128     /* When dividing by -1, we could get an overflow.
4129      negv_optab can handle overflows.  */
4130   if (! unsignedp && op1 == constm1_rtx)
4131     {
4132       if (rem_flag)
4133         return const0_rtx;
4134       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4135                           ? negv_optab : neg_optab, op0, target, 0);
4136     }
4137
4138   if (target
4139       /* Don't use the function value register as a target
4140          since we have to read it as well as write it,
4141          and function-inlining gets confused by this.  */
4142       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4143           /* Don't clobber an operand while doing a multi-step calculation.  */
4144           || ((rem_flag || op1_is_constant)
4145               && (reg_mentioned_p (target, op0)
4146                   || (MEM_P (op0) && MEM_P (target))))
4147           || reg_mentioned_p (target, op1)
4148           || (MEM_P (op1) && MEM_P (target))))
4149     target = 0;
4150
4151   /* Get the mode in which to perform this computation.  Normally it will
4152      be MODE, but sometimes we can't do the desired operation in MODE.
4153      If so, pick a wider mode in which we can do the operation.  Convert
4154      to that mode at the start to avoid repeated conversions.
4155
4156      First see what operations we need.  These depend on the expression
4157      we are evaluating.  (We assume that divxx3 insns exist under the
4158      same conditions that modxx3 insns and that these insns don't normally
4159      fail.  If these assumptions are not correct, we may generate less
4160      efficient code in some cases.)
4161
4162      Then see if we find a mode in which we can open-code that operation
4163      (either a division, modulus, or shift).  Finally, check for the smallest
4164      mode for which we can do the operation with a library call.  */
4165
4166   /* We might want to refine this now that we have division-by-constant
4167      optimization.  Since expmed_mult_highpart tries so many variants, it is
4168      not straightforward to generalize this.  Maybe we should make an array
4169      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4170
4171   optab1 = (op1_is_pow2
4172             ? (unsignedp ? lshr_optab : ashr_optab)
4173             : (unsignedp ? udiv_optab : sdiv_optab));
4174   optab2 = (op1_is_pow2 ? optab1
4175             : (unsignedp ? udivmod_optab : sdivmod_optab));
4176
4177   FOR_EACH_MODE_FROM (compute_mode, mode)
4178     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4179         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4180       break;
4181
4182   if (compute_mode == VOIDmode)
4183     FOR_EACH_MODE_FROM (compute_mode, mode)
4184       if (optab_libfunc (optab1, compute_mode)
4185           || optab_libfunc (optab2, compute_mode))
4186         break;
4187
4188   /* If we still couldn't find a mode, use MODE, but expand_binop will
4189      probably die.  */
4190   if (compute_mode == VOIDmode)
4191     compute_mode = mode;
4192
4193   if (target && GET_MODE (target) == compute_mode)
4194     tquotient = target;
4195   else
4196     tquotient = gen_reg_rtx (compute_mode);
4197
4198 #if 0
4199   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4200      (mode), and thereby get better code when OP1 is a constant.  Do that
4201      later.  It will require going over all usages of SIZE below.  */
4202   size = GET_MODE_BITSIZE (mode);
4203 #endif
4204
4205   /* Only deduct something for a REM if the last divide done was
4206      for a different constant.   Then set the constant of the last
4207      divide.  */
4208   max_cost = (unsignedp
4209               ? udiv_cost (speed, compute_mode)
4210               : sdiv_cost (speed, compute_mode));
4211   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4212                      && INTVAL (op1) == last_div_const))
4213     max_cost -= (mul_cost (speed, compute_mode)
4214                  + add_cost (speed, compute_mode));
4215
4216   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4217
4218   /* Now convert to the best mode to use.  */
4219   if (compute_mode != mode)
4220     {
4221       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4222       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4223
4224       /* convert_modes may have placed op1 into a register, so we
4225          must recompute the following.  */
4226       op1_is_constant = CONST_INT_P (op1);
4227       if (op1_is_constant)
4228         {
4229           wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4230           op1_is_pow2 = (wi::popcount (ext_op1) == 1
4231                          || (! unsignedp
4232                              && wi::popcount (wi::neg (ext_op1)) == 1));
4233         }
4234       else
4235         op1_is_pow2 = 0;
4236     }
4237
4238   /* If one of the operands is a volatile MEM, copy it into a register.  */
4239
4240   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4241     op0 = force_reg (compute_mode, op0);
4242   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4243     op1 = force_reg (compute_mode, op1);
4244
4245   /* If we need the remainder or if OP1 is constant, we need to
4246      put OP0 in a register in case it has any queued subexpressions.  */
4247   if (rem_flag || op1_is_constant)
4248     op0 = force_reg (compute_mode, op0);
4249
4250   last = get_last_insn ();
4251
4252   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4253   if (unsignedp)
4254     {
4255       if (code == FLOOR_DIV_EXPR)
4256         code = TRUNC_DIV_EXPR;
4257       if (code == FLOOR_MOD_EXPR)
4258         code = TRUNC_MOD_EXPR;
4259       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4260         code = TRUNC_DIV_EXPR;
4261     }
4262
4263   if (op1 != const0_rtx)
4264     switch (code)
4265       {
4266       case TRUNC_MOD_EXPR:
4267       case TRUNC_DIV_EXPR:
4268         if (op1_is_constant)
4269           {
4270             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4271             int size = GET_MODE_BITSIZE (int_mode);
4272             if (unsignedp)
4273               {
4274                 unsigned HOST_WIDE_INT mh, ml;
4275                 int pre_shift, post_shift;
4276                 int dummy;
4277                 wide_int wd = rtx_mode_t (op1, int_mode);
4278                 unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4279
4280                 if (wi::popcount (wd) == 1)
4281                   {
4282                     pre_shift = floor_log2 (d);
4283                     if (rem_flag)
4284                       {
4285                         unsigned HOST_WIDE_INT mask
4286                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4287                         remainder
4288                           = expand_binop (int_mode, and_optab, op0,
4289                                           gen_int_mode (mask, int_mode),
4290                                           remainder, 1,
4291                                           OPTAB_LIB_WIDEN);
4292                         if (remainder)
4293                           return gen_lowpart (mode, remainder);
4294                       }
4295                     quotient = expand_shift (RSHIFT_EXPR, int_mode, op0,
4296                                              pre_shift, tquotient, 1);
4297                   }
4298                 else if (size <= HOST_BITS_PER_WIDE_INT)
4299                   {
4300                     if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4301                       {
4302                         /* Most significant bit of divisor is set; emit an scc
4303                            insn.  */
4304                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4305                                                           int_mode, 1, 1);
4306                       }
4307                     else
4308                       {
4309                         /* Find a suitable multiplier and right shift count
4310                            instead of multiplying with D.  */
4311
4312                         mh = choose_multiplier (d, size, size,
4313                                                 &ml, &post_shift, &dummy);
4314
4315                         /* If the suggested multiplier is more than SIZE bits,
4316                            we can do better for even divisors, using an
4317                            initial right shift.  */
4318                         if (mh != 0 && (d & 1) == 0)
4319                           {
4320                             pre_shift = ctz_or_zero (d);
4321                             mh = choose_multiplier (d >> pre_shift, size,
4322                                                     size - pre_shift,
4323                                                     &ml, &post_shift, &dummy);
4324                             gcc_assert (!mh);
4325                           }
4326                         else
4327                           pre_shift = 0;
4328
4329                         if (mh != 0)
4330                           {
4331                             rtx t1, t2, t3, t4;
4332
4333                             if (post_shift - 1 >= BITS_PER_WORD)
4334                               goto fail1;
4335
4336                             extra_cost
4337                               = (shift_cost (speed, int_mode, post_shift - 1)
4338                                  + shift_cost (speed, int_mode, 1)
4339                                  + 2 * add_cost (speed, int_mode));
4340                             t1 = expmed_mult_highpart
4341                               (int_mode, op0, gen_int_mode (ml, int_mode),
4342                                NULL_RTX, 1, max_cost - extra_cost);
4343                             if (t1 == 0)
4344                               goto fail1;
4345                             t2 = force_operand (gen_rtx_MINUS (int_mode,
4346                                                                op0, t1),
4347                                                 NULL_RTX);
4348                             t3 = expand_shift (RSHIFT_EXPR, int_mode,
4349                                                t2, 1, NULL_RTX, 1);
4350                             t4 = force_operand (gen_rtx_PLUS (int_mode,
4351                                                               t1, t3),
4352                                                 NULL_RTX);
4353                             quotient = expand_shift
4354                               (RSHIFT_EXPR, int_mode, t4,
4355                                post_shift - 1, tquotient, 1);
4356                           }
4357                         else
4358                           {
4359                             rtx t1, t2;
4360
4361                             if (pre_shift >= BITS_PER_WORD
4362                                 || post_shift >= BITS_PER_WORD)
4363                               goto fail1;
4364
4365                             t1 = expand_shift
4366                               (RSHIFT_EXPR, int_mode, op0,
4367                                pre_shift, NULL_RTX, 1);
4368                             extra_cost
4369                               = (shift_cost (speed, int_mode, pre_shift)
4370                                  + shift_cost (speed, int_mode, post_shift));
4371                             t2 = expmed_mult_highpart
4372                               (int_mode, t1,
4373                                gen_int_mode (ml, int_mode),
4374                                NULL_RTX, 1, max_cost - extra_cost);
4375                             if (t2 == 0)
4376                               goto fail1;
4377                             quotient = expand_shift
4378                               (RSHIFT_EXPR, int_mode, t2,
4379                                post_shift, tquotient, 1);
4380                           }
4381                       }
4382                   }
4383                 else            /* Too wide mode to use tricky code */
4384                   break;
4385
4386                 insn = get_last_insn ();
4387                 if (insn != last)
4388                   set_dst_reg_note (insn, REG_EQUAL,
4389                                     gen_rtx_UDIV (int_mode, op0, op1),
4390                                     quotient);
4391               }
4392             else                /* TRUNC_DIV, signed */
4393               {
4394                 unsigned HOST_WIDE_INT ml;
4395                 int lgup, post_shift;
4396                 rtx mlr;
4397                 HOST_WIDE_INT d = INTVAL (op1);
4398                 unsigned HOST_WIDE_INT abs_d;
4399
4400                 /* Since d might be INT_MIN, we have to cast to
4401                    unsigned HOST_WIDE_INT before negating to avoid
4402                    undefined signed overflow.  */
4403                 abs_d = (d >= 0
4404                          ? (unsigned HOST_WIDE_INT) d
4405                          : - (unsigned HOST_WIDE_INT) d);
4406
4407                 /* n rem d = n rem -d */
4408                 if (rem_flag && d < 0)
4409                   {
4410                     d = abs_d;
4411                     op1 = gen_int_mode (abs_d, int_mode);
4412                   }
4413
4414                 if (d == 1)
4415                   quotient = op0;
4416                 else if (d == -1)
4417                   quotient = expand_unop (int_mode, neg_optab, op0,
4418                                           tquotient, 0);
4419                 else if (size <= HOST_BITS_PER_WIDE_INT
4420                          && abs_d == HOST_WIDE_INT_1U << (size - 1))
4421                   {
4422                     /* This case is not handled correctly below.  */
4423                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4424                                                 int_mode, 1, 1);
4425                     if (quotient == 0)
4426                       goto fail1;
4427                   }
4428                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4429                          && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4430                          && (rem_flag
4431                              ? smod_pow2_cheap (speed, int_mode)
4432                              : sdiv_pow2_cheap (speed, int_mode))
4433                          /* We assume that cheap metric is true if the
4434                             optab has an expander for this mode.  */
4435                          && ((optab_handler ((rem_flag ? smod_optab
4436                                               : sdiv_optab),
4437                                              int_mode)
4438                               != CODE_FOR_nothing)
4439                              || (optab_handler (sdivmod_optab, int_mode)
4440                                  != CODE_FOR_nothing)))
4441                   ;
4442                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d)
4443                          && (size <= HOST_BITS_PER_WIDE_INT
4444                              || abs_d != (unsigned HOST_WIDE_INT) d))
4445                   {
4446                     if (rem_flag)
4447                       {
4448                         remainder = expand_smod_pow2 (int_mode, op0, d);
4449                         if (remainder)
4450                           return gen_lowpart (mode, remainder);
4451                       }
4452
4453                     if (sdiv_pow2_cheap (speed, int_mode)
4454                         && ((optab_handler (sdiv_optab, int_mode)
4455                              != CODE_FOR_nothing)
4456                             || (optab_handler (sdivmod_optab, int_mode)
4457                                 != CODE_FOR_nothing)))
4458                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4459                                                 int_mode, op0,
4460                                                 gen_int_mode (abs_d,
4461                                                               int_mode),
4462                                                 NULL_RTX, 0);
4463                     else
4464                       quotient = expand_sdiv_pow2 (int_mode, op0, abs_d);
4465
4466                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4467                        negate the quotient.  */
4468                     if (d < 0)
4469                       {
4470                         insn = get_last_insn ();
4471                         if (insn != last
4472                             && abs_d < (HOST_WIDE_INT_1U
4473                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4474                           set_dst_reg_note (insn, REG_EQUAL,
4475                                             gen_rtx_DIV (int_mode, op0,
4476                                                          gen_int_mode
4477                                                            (abs_d,
4478                                                             int_mode)),
4479                                             quotient);
4480
4481                         quotient = expand_unop (int_mode, neg_optab,
4482                                                 quotient, quotient, 0);
4483                       }
4484                   }
4485                 else if (size <= HOST_BITS_PER_WIDE_INT)
4486                   {
4487                     choose_multiplier (abs_d, size, size - 1,
4488                                        &ml, &post_shift, &lgup);
4489                     if (ml < HOST_WIDE_INT_1U << (size - 1))
4490                       {
4491                         rtx t1, t2, t3;
4492
4493                         if (post_shift >= BITS_PER_WORD
4494                             || size - 1 >= BITS_PER_WORD)
4495                           goto fail1;
4496
4497                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4498                                       + shift_cost (speed, int_mode, size - 1)
4499                                       + add_cost (speed, int_mode));
4500                         t1 = expmed_mult_highpart
4501                           (int_mode, op0, gen_int_mode (ml, int_mode),
4502                            NULL_RTX, 0, max_cost - extra_cost);
4503                         if (t1 == 0)
4504                           goto fail1;
4505                         t2 = expand_shift
4506                           (RSHIFT_EXPR, int_mode, t1,
4507                            post_shift, NULL_RTX, 0);
4508                         t3 = expand_shift
4509                           (RSHIFT_EXPR, int_mode, op0,
4510                            size - 1, NULL_RTX, 0);
4511                         if (d < 0)
4512                           quotient
4513                             = force_operand (gen_rtx_MINUS (int_mode, t3, t2),
4514                                              tquotient);
4515                         else
4516                           quotient
4517                             = force_operand (gen_rtx_MINUS (int_mode, t2, t3),
4518                                              tquotient);
4519                       }
4520                     else
4521                       {
4522                         rtx t1, t2, t3, t4;
4523
4524                         if (post_shift >= BITS_PER_WORD
4525                             || size - 1 >= BITS_PER_WORD)
4526                           goto fail1;
4527
4528                         ml |= HOST_WIDE_INT_M1U << (size - 1);
4529                         mlr = gen_int_mode (ml, int_mode);
4530                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4531                                       + shift_cost (speed, int_mode, size - 1)
4532                                       + 2 * add_cost (speed, int_mode));
4533                         t1 = expmed_mult_highpart (int_mode, op0, mlr,
4534                                                    NULL_RTX, 0,
4535                                                    max_cost - extra_cost);
4536                         if (t1 == 0)
4537                           goto fail1;
4538                         t2 = force_operand (gen_rtx_PLUS (int_mode, t1, op0),
4539                                             NULL_RTX);
4540                         t3 = expand_shift
4541                           (RSHIFT_EXPR, int_mode, t2,
4542                            post_shift, NULL_RTX, 0);
4543                         t4 = expand_shift
4544                           (RSHIFT_EXPR, int_mode, op0,
4545                            size - 1, NULL_RTX, 0);
4546                         if (d < 0)
4547                           quotient
4548                             = force_operand (gen_rtx_MINUS (int_mode, t4, t3),
4549                                              tquotient);
4550                         else
4551                           quotient
4552                             = force_operand (gen_rtx_MINUS (int_mode, t3, t4),
4553                                              tquotient);
4554                       }
4555                   }
4556                 else            /* Too wide mode to use tricky code */
4557                   break;
4558
4559                 insn = get_last_insn ();
4560                 if (insn != last)
4561                   set_dst_reg_note (insn, REG_EQUAL,
4562                                     gen_rtx_DIV (int_mode, op0, op1),
4563                                     quotient);
4564               }
4565             break;
4566           }
4567       fail1:
4568         delete_insns_since (last);
4569         break;
4570
4571       case FLOOR_DIV_EXPR:
4572       case FLOOR_MOD_EXPR:
4573       /* We will come here only for signed operations.  */
4574         if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4575           {
4576             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4577             int size = GET_MODE_BITSIZE (int_mode);
4578             unsigned HOST_WIDE_INT mh, ml;
4579             int pre_shift, lgup, post_shift;
4580             HOST_WIDE_INT d = INTVAL (op1);
4581
4582             if (d > 0)
4583               {
4584                 /* We could just as easily deal with negative constants here,
4585                    but it does not seem worth the trouble for GCC 2.6.  */
4586                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4587                   {
4588                     pre_shift = floor_log2 (d);
4589                     if (rem_flag)
4590                       {
4591                         unsigned HOST_WIDE_INT mask
4592                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4593                         remainder = expand_binop
4594                           (int_mode, and_optab, op0,
4595                            gen_int_mode (mask, int_mode),
4596                            remainder, 0, OPTAB_LIB_WIDEN);
4597                         if (remainder)
4598                           return gen_lowpart (mode, remainder);
4599                       }
4600                     quotient = expand_shift
4601                       (RSHIFT_EXPR, int_mode, op0,
4602                        pre_shift, tquotient, 0);
4603                   }
4604                 else
4605                   {
4606                     rtx t1, t2, t3, t4;
4607
4608                     mh = choose_multiplier (d, size, size - 1,
4609                                             &ml, &post_shift, &lgup);
4610                     gcc_assert (!mh);
4611
4612                     if (post_shift < BITS_PER_WORD
4613                         && size - 1 < BITS_PER_WORD)
4614                       {
4615                         t1 = expand_shift
4616                           (RSHIFT_EXPR, int_mode, op0,
4617                            size - 1, NULL_RTX, 0);
4618                         t2 = expand_binop (int_mode, xor_optab, op0, t1,
4619                                            NULL_RTX, 0, OPTAB_WIDEN);
4620                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4621                                       + shift_cost (speed, int_mode, size - 1)
4622                                       + 2 * add_cost (speed, int_mode));
4623                         t3 = expmed_mult_highpart
4624                           (int_mode, t2, gen_int_mode (ml, int_mode),
4625                            NULL_RTX, 1, max_cost - extra_cost);
4626                         if (t3 != 0)
4627                           {
4628                             t4 = expand_shift
4629                               (RSHIFT_EXPR, int_mode, t3,
4630                                post_shift, NULL_RTX, 1);
4631                             quotient = expand_binop (int_mode, xor_optab,
4632                                                      t4, t1, tquotient, 0,
4633                                                      OPTAB_WIDEN);
4634                           }
4635                       }
4636                   }
4637               }
4638             else
4639               {
4640                 rtx nsign, t1, t2, t3, t4;
4641                 t1 = force_operand (gen_rtx_PLUS (int_mode,
4642                                                   op0, constm1_rtx), NULL_RTX);
4643                 t2 = expand_binop (int_mode, ior_optab, op0, t1, NULL_RTX,
4644                                    0, OPTAB_WIDEN);
4645                 nsign = expand_shift (RSHIFT_EXPR, int_mode, t2,
4646                                       size - 1, NULL_RTX, 0);
4647                 t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign),
4648                                     NULL_RTX);
4649                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, t3, op1,
4650                                     NULL_RTX, 0);
4651                 if (t4)
4652                   {
4653                     rtx t5;
4654                     t5 = expand_unop (int_mode, one_cmpl_optab, nsign,
4655                                       NULL_RTX, 0);
4656                     quotient = force_operand (gen_rtx_PLUS (int_mode, t4, t5),
4657                                               tquotient);
4658                   }
4659               }
4660           }
4661
4662         if (quotient != 0)
4663           break;
4664         delete_insns_since (last);
4665
4666         /* Try using an instruction that produces both the quotient and
4667            remainder, using truncation.  We can easily compensate the quotient
4668            or remainder to get floor rounding, once we have the remainder.
4669            Notice that we compute also the final remainder value here,
4670            and return the result right away.  */
4671         if (target == 0 || GET_MODE (target) != compute_mode)
4672           target = gen_reg_rtx (compute_mode);
4673
4674         if (rem_flag)
4675           {
4676             remainder
4677               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4678             quotient = gen_reg_rtx (compute_mode);
4679           }
4680         else
4681           {
4682             quotient
4683               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4684             remainder = gen_reg_rtx (compute_mode);
4685           }
4686
4687         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4688                                  quotient, remainder, 0))
4689           {
4690             /* This could be computed with a branch-less sequence.
4691                Save that for later.  */
4692             rtx tem;
4693             rtx_code_label *label = gen_label_rtx ();
4694             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4695             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4696                                 NULL_RTX, 0, OPTAB_WIDEN);
4697             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4698             expand_dec (quotient, const1_rtx);
4699             expand_inc (remainder, op1);
4700             emit_label (label);
4701             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4702           }
4703
4704         /* No luck with division elimination or divmod.  Have to do it
4705            by conditionally adjusting op0 *and* the result.  */
4706         {
4707           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4708           rtx adjusted_op0;
4709           rtx tem;
4710
4711           quotient = gen_reg_rtx (compute_mode);
4712           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4713           label1 = gen_label_rtx ();
4714           label2 = gen_label_rtx ();
4715           label3 = gen_label_rtx ();
4716           label4 = gen_label_rtx ();
4717           label5 = gen_label_rtx ();
4718           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4719           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4720           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4721                               quotient, 0, OPTAB_LIB_WIDEN);
4722           if (tem != quotient)
4723             emit_move_insn (quotient, tem);
4724           emit_jump_insn (targetm.gen_jump (label5));
4725           emit_barrier ();
4726           emit_label (label1);
4727           expand_inc (adjusted_op0, const1_rtx);
4728           emit_jump_insn (targetm.gen_jump (label4));
4729           emit_barrier ();
4730           emit_label (label2);
4731           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4732           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4733                               quotient, 0, OPTAB_LIB_WIDEN);
4734           if (tem != quotient)
4735             emit_move_insn (quotient, tem);
4736           emit_jump_insn (targetm.gen_jump (label5));
4737           emit_barrier ();
4738           emit_label (label3);
4739           expand_dec (adjusted_op0, const1_rtx);
4740           emit_label (label4);
4741           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4742                               quotient, 0, OPTAB_LIB_WIDEN);
4743           if (tem != quotient)
4744             emit_move_insn (quotient, tem);
4745           expand_dec (quotient, const1_rtx);
4746           emit_label (label5);
4747         }
4748         break;
4749
4750       case CEIL_DIV_EXPR:
4751       case CEIL_MOD_EXPR:
4752         if (unsignedp)
4753           {
4754             if (op1_is_constant
4755                 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4756                 && (HWI_COMPUTABLE_MODE_P (compute_mode)
4757                     || INTVAL (op1) >= 0))
4758               {
4759                 scalar_int_mode int_mode
4760                   = as_a <scalar_int_mode> (compute_mode);
4761                 rtx t1, t2, t3;
4762                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4763                 t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4764                                    floor_log2 (d), tquotient, 1);
4765                 t2 = expand_binop (int_mode, and_optab, op0,
4766                                    gen_int_mode (d - 1, int_mode),
4767                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4768                 t3 = gen_reg_rtx (int_mode);
4769                 t3 = emit_store_flag (t3, NE, t2, const0_rtx, int_mode, 1, 1);
4770                 if (t3 == 0)
4771                   {
4772                     rtx_code_label *lab;
4773                     lab = gen_label_rtx ();
4774                     do_cmp_and_jump (t2, const0_rtx, EQ, int_mode, lab);
4775                     expand_inc (t1, const1_rtx);
4776                     emit_label (lab);
4777                     quotient = t1;
4778                   }
4779                 else
4780                   quotient = force_operand (gen_rtx_PLUS (int_mode, t1, t3),
4781                                             tquotient);
4782                 break;
4783               }
4784
4785             /* Try using an instruction that produces both the quotient and
4786                remainder, using truncation.  We can easily compensate the
4787                quotient or remainder to get ceiling rounding, once we have the
4788                remainder.  Notice that we compute also the final remainder
4789                value here, and return the result right away.  */
4790             if (target == 0 || GET_MODE (target) != compute_mode)
4791               target = gen_reg_rtx (compute_mode);
4792
4793             if (rem_flag)
4794               {
4795                 remainder = (REG_P (target)
4796                              ? target : gen_reg_rtx (compute_mode));
4797                 quotient = gen_reg_rtx (compute_mode);
4798               }
4799             else
4800               {
4801                 quotient = (REG_P (target)
4802                             ? target : gen_reg_rtx (compute_mode));
4803                 remainder = gen_reg_rtx (compute_mode);
4804               }
4805
4806             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4807                                      remainder, 1))
4808               {
4809                 /* This could be computed with a branch-less sequence.
4810                    Save that for later.  */
4811                 rtx_code_label *label = gen_label_rtx ();
4812                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4813                                  compute_mode, label);
4814                 expand_inc (quotient, const1_rtx);
4815                 expand_dec (remainder, op1);
4816                 emit_label (label);
4817                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4818               }
4819
4820             /* No luck with division elimination or divmod.  Have to do it
4821                by conditionally adjusting op0 *and* the result.  */
4822             {
4823               rtx_code_label *label1, *label2;
4824               rtx adjusted_op0, tem;
4825
4826               quotient = gen_reg_rtx (compute_mode);
4827               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4828               label1 = gen_label_rtx ();
4829               label2 = gen_label_rtx ();
4830               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4831                                compute_mode, label1);
4832               emit_move_insn  (quotient, const0_rtx);
4833               emit_jump_insn (targetm.gen_jump (label2));
4834               emit_barrier ();
4835               emit_label (label1);
4836               expand_dec (adjusted_op0, const1_rtx);
4837               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4838                                   quotient, 1, OPTAB_LIB_WIDEN);
4839               if (tem != quotient)
4840                 emit_move_insn (quotient, tem);
4841               expand_inc (quotient, const1_rtx);
4842               emit_label (label2);
4843             }
4844           }
4845         else /* signed */
4846           {
4847             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4848                 && INTVAL (op1) >= 0)
4849               {
4850                 /* This is extremely similar to the code for the unsigned case
4851                    above.  For 2.7 we should merge these variants, but for
4852                    2.6.1 I don't want to touch the code for unsigned since that
4853                    get used in C.  The signed case will only be used by other
4854                    languages (Ada).  */
4855
4856                 rtx t1, t2, t3;
4857                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4858                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4859                                    floor_log2 (d), tquotient, 0);
4860                 t2 = expand_binop (compute_mode, and_optab, op0,
4861                                    gen_int_mode (d - 1, compute_mode),
4862                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4863                 t3 = gen_reg_rtx (compute_mode);
4864                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4865                                       compute_mode, 1, 1);
4866                 if (t3 == 0)
4867                   {
4868                     rtx_code_label *lab;
4869                     lab = gen_label_rtx ();
4870                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4871                     expand_inc (t1, const1_rtx);
4872                     emit_label (lab);
4873                     quotient = t1;
4874                   }
4875                 else
4876                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4877                                                           t1, t3),
4878                                             tquotient);
4879                 break;
4880               }
4881
4882             /* Try using an instruction that produces both the quotient and
4883                remainder, using truncation.  We can easily compensate the
4884                quotient or remainder to get ceiling rounding, once we have the
4885                remainder.  Notice that we compute also the final remainder
4886                value here, and return the result right away.  */
4887             if (target == 0 || GET_MODE (target) != compute_mode)
4888               target = gen_reg_rtx (compute_mode);
4889             if (rem_flag)
4890               {
4891                 remainder= (REG_P (target)
4892                             ? target : gen_reg_rtx (compute_mode));
4893                 quotient = gen_reg_rtx (compute_mode);
4894               }
4895             else
4896               {
4897                 quotient = (REG_P (target)
4898                             ? target : gen_reg_rtx (compute_mode));
4899                 remainder = gen_reg_rtx (compute_mode);
4900               }
4901
4902             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4903                                      remainder, 0))
4904               {
4905                 /* This could be computed with a branch-less sequence.
4906                    Save that for later.  */
4907                 rtx tem;
4908                 rtx_code_label *label = gen_label_rtx ();
4909                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4910                                  compute_mode, label);
4911                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4912                                     NULL_RTX, 0, OPTAB_WIDEN);
4913                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4914                 expand_inc (quotient, const1_rtx);
4915                 expand_dec (remainder, op1);
4916                 emit_label (label);
4917                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4918               }
4919
4920             /* No luck with division elimination or divmod.  Have to do it
4921                by conditionally adjusting op0 *and* the result.  */
4922             {
4923               rtx_code_label *label1, *label2, *label3, *label4, *label5;
4924               rtx adjusted_op0;
4925               rtx tem;
4926
4927               quotient = gen_reg_rtx (compute_mode);
4928               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4929               label1 = gen_label_rtx ();
4930               label2 = gen_label_rtx ();
4931               label3 = gen_label_rtx ();
4932               label4 = gen_label_rtx ();
4933               label5 = gen_label_rtx ();
4934               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4935               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4936                                compute_mode, label1);
4937               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4938                                   quotient, 0, OPTAB_LIB_WIDEN);
4939               if (tem != quotient)
4940                 emit_move_insn (quotient, tem);
4941               emit_jump_insn (targetm.gen_jump (label5));
4942               emit_barrier ();
4943               emit_label (label1);
4944               expand_dec (adjusted_op0, const1_rtx);
4945               emit_jump_insn (targetm.gen_jump (label4));
4946               emit_barrier ();
4947               emit_label (label2);
4948               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4949                                compute_mode, label3);
4950               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4951                                   quotient, 0, OPTAB_LIB_WIDEN);
4952               if (tem != quotient)
4953                 emit_move_insn (quotient, tem);
4954               emit_jump_insn (targetm.gen_jump (label5));
4955               emit_barrier ();
4956               emit_label (label3);
4957               expand_inc (adjusted_op0, const1_rtx);
4958               emit_label (label4);
4959               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4960                                   quotient, 0, OPTAB_LIB_WIDEN);
4961               if (tem != quotient)
4962                 emit_move_insn (quotient, tem);
4963               expand_inc (quotient, const1_rtx);
4964               emit_label (label5);
4965             }
4966           }
4967         break;
4968
4969       case EXACT_DIV_EXPR:
4970         if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4971           {
4972             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4973             int size = GET_MODE_BITSIZE (int_mode);
4974             HOST_WIDE_INT d = INTVAL (op1);
4975             unsigned HOST_WIDE_INT ml;
4976             int pre_shift;
4977             rtx t1;
4978
4979             pre_shift = ctz_or_zero (d);
4980             ml = invert_mod2n (d >> pre_shift, size);
4981             t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4982                                pre_shift, NULL_RTX, unsignedp);
4983             quotient = expand_mult (int_mode, t1, gen_int_mode (ml, int_mode),
4984                                     NULL_RTX, 1);
4985
4986             insn = get_last_insn ();
4987             set_dst_reg_note (insn, REG_EQUAL,
4988                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4989                                               int_mode, op0, op1),
4990                               quotient);
4991           }
4992         break;
4993
4994       case ROUND_DIV_EXPR:
4995       case ROUND_MOD_EXPR:
4996         if (unsignedp)
4997           {
4998             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4999             rtx tem;
5000             rtx_code_label *label;
5001             label = gen_label_rtx ();
5002             quotient = gen_reg_rtx (int_mode);
5003             remainder = gen_reg_rtx (int_mode);
5004             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
5005               {
5006                 rtx tem;
5007                 quotient = expand_binop (int_mode, udiv_optab, op0, op1,
5008                                          quotient, 1, OPTAB_LIB_WIDEN);
5009                 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 1);
5010                 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5011                                           remainder, 1, OPTAB_LIB_WIDEN);
5012               }
5013             tem = plus_constant (int_mode, op1, -1);
5014             tem = expand_shift (RSHIFT_EXPR, int_mode, tem, 1, NULL_RTX, 1);
5015             do_cmp_and_jump (remainder, tem, LEU, int_mode, label);
5016             expand_inc (quotient, const1_rtx);
5017             expand_dec (remainder, op1);
5018             emit_label (label);
5019           }
5020         else
5021           {
5022             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5023             int size = GET_MODE_BITSIZE (int_mode);
5024             rtx abs_rem, abs_op1, tem, mask;
5025             rtx_code_label *label;
5026             label = gen_label_rtx ();
5027             quotient = gen_reg_rtx (int_mode);
5028             remainder = gen_reg_rtx (int_mode);
5029             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
5030               {
5031                 rtx tem;
5032                 quotient = expand_binop (int_mode, sdiv_optab, op0, op1,
5033                                          quotient, 0, OPTAB_LIB_WIDEN);
5034                 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 0);
5035                 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5036                                           remainder, 0, OPTAB_LIB_WIDEN);
5037               }
5038             abs_rem = expand_abs (int_mode, remainder, NULL_RTX, 1, 0);
5039             abs_op1 = expand_abs (int_mode, op1, NULL_RTX, 1, 0);
5040             tem = expand_shift (LSHIFT_EXPR, int_mode, abs_rem,
5041                                 1, NULL_RTX, 1);
5042             do_cmp_and_jump (tem, abs_op1, LTU, int_mode, label);
5043             tem = expand_binop (int_mode, xor_optab, op0, op1,
5044                                 NULL_RTX, 0, OPTAB_WIDEN);
5045             mask = expand_shift (RSHIFT_EXPR, int_mode, tem,
5046                                  size - 1, NULL_RTX, 0);
5047             tem = expand_binop (int_mode, xor_optab, mask, const1_rtx,
5048                                 NULL_RTX, 0, OPTAB_WIDEN);
5049             tem = expand_binop (int_mode, sub_optab, tem, mask,
5050                                 NULL_RTX, 0, OPTAB_WIDEN);
5051             expand_inc (quotient, tem);
5052             tem = expand_binop (int_mode, xor_optab, mask, op1,
5053                                 NULL_RTX, 0, OPTAB_WIDEN);
5054             tem = expand_binop (int_mode, sub_optab, tem, mask,
5055                                 NULL_RTX, 0, OPTAB_WIDEN);
5056             expand_dec (remainder, tem);
5057             emit_label (label);
5058           }
5059         return gen_lowpart (mode, rem_flag ? remainder : quotient);
5060
5061       default:
5062         gcc_unreachable ();
5063       }
5064
5065   if (quotient == 0)
5066     {
5067       if (target && GET_MODE (target) != compute_mode)
5068         target = 0;
5069
5070       if (rem_flag)
5071         {
5072           /* Try to produce the remainder without producing the quotient.
5073              If we seem to have a divmod pattern that does not require widening,
5074              don't try widening here.  We should really have a WIDEN argument
5075              to expand_twoval_binop, since what we'd really like to do here is
5076              1) try a mod insn in compute_mode
5077              2) try a divmod insn in compute_mode
5078              3) try a div insn in compute_mode and multiply-subtract to get
5079                 remainder
5080              4) try the same things with widening allowed.  */
5081           remainder
5082             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5083                                  op0, op1, target,
5084                                  unsignedp,
5085                                  ((optab_handler (optab2, compute_mode)
5086                                    != CODE_FOR_nothing)
5087                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
5088           if (remainder == 0)
5089             {
5090               /* No luck there.  Can we do remainder and divide at once
5091                  without a library call?  */
5092               remainder = gen_reg_rtx (compute_mode);
5093               if (! expand_twoval_binop ((unsignedp
5094                                           ? udivmod_optab
5095                                           : sdivmod_optab),
5096                                          op0, op1,
5097                                          NULL_RTX, remainder, unsignedp))
5098                 remainder = 0;
5099             }
5100
5101           if (remainder)
5102             return gen_lowpart (mode, remainder);
5103         }
5104
5105       /* Produce the quotient.  Try a quotient insn, but not a library call.
5106          If we have a divmod in this mode, use it in preference to widening
5107          the div (for this test we assume it will not fail). Note that optab2
5108          is set to the one of the two optabs that the call below will use.  */
5109       quotient
5110         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5111                              op0, op1, rem_flag ? NULL_RTX : target,
5112                              unsignedp,
5113                              ((optab_handler (optab2, compute_mode)
5114                                != CODE_FOR_nothing)
5115                               ? OPTAB_DIRECT : OPTAB_WIDEN));
5116
5117       if (quotient == 0)
5118         {
5119           /* No luck there.  Try a quotient-and-remainder insn,
5120              keeping the quotient alone.  */
5121           quotient = gen_reg_rtx (compute_mode);
5122           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5123                                      op0, op1,
5124                                      quotient, NULL_RTX, unsignedp))
5125             {
5126               quotient = 0;
5127               if (! rem_flag)
5128                 /* Still no luck.  If we are not computing the remainder,
5129                    use a library call for the quotient.  */
5130                 quotient = sign_expand_binop (compute_mode,
5131                                               udiv_optab, sdiv_optab,
5132                                               op0, op1, target,
5133                                               unsignedp, OPTAB_LIB_WIDEN);
5134             }
5135         }
5136     }
5137
5138   if (rem_flag)
5139     {
5140       if (target && GET_MODE (target) != compute_mode)
5141         target = 0;
5142
5143       if (quotient == 0)
5144         {
5145           /* No divide instruction either.  Use library for remainder.  */
5146           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5147                                          op0, op1, target,
5148                                          unsignedp, OPTAB_LIB_WIDEN);
5149           /* No remainder function.  Try a quotient-and-remainder
5150              function, keeping the remainder.  */
5151           if (!remainder)
5152             {
5153               remainder = gen_reg_rtx (compute_mode);
5154               if (!expand_twoval_binop_libfunc
5155                   (unsignedp ? udivmod_optab : sdivmod_optab,
5156                    op0, op1,
5157                    NULL_RTX, remainder,
5158                    unsignedp ? UMOD : MOD))
5159                 remainder = NULL_RTX;
5160             }
5161         }
5162       else
5163         {
5164           /* We divided.  Now finish doing X - Y * (X / Y).  */
5165           remainder = expand_mult (compute_mode, quotient, op1,
5166                                    NULL_RTX, unsignedp);
5167           remainder = expand_binop (compute_mode, sub_optab, op0,
5168                                     remainder, target, unsignedp,
5169                                     OPTAB_LIB_WIDEN);
5170         }
5171     }
5172
5173   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5174 }
5175 \f
5176 /* Return a tree node with data type TYPE, describing the value of X.
5177    Usually this is an VAR_DECL, if there is no obvious better choice.
5178    X may be an expression, however we only support those expressions
5179    generated by loop.c.  */
5180
5181 tree
5182 make_tree (tree type, rtx x)
5183 {
5184   tree t;
5185
5186   switch (GET_CODE (x))
5187     {
5188     case CONST_INT:
5189     case CONST_WIDE_INT:
5190       t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5191       return t;
5192
5193     case CONST_DOUBLE:
5194       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5195       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5196         t = wide_int_to_tree (type,
5197                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5198                                                     HOST_BITS_PER_WIDE_INT * 2));
5199       else
5200         t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5201
5202       return t;
5203
5204     case CONST_VECTOR:
5205       {
5206         int units = CONST_VECTOR_NUNITS (x);
5207         tree itype = TREE_TYPE (type);
5208         tree *elts;
5209         int i;
5210
5211         /* Build a tree with vector elements.  */
5212         elts = XALLOCAVEC (tree, units);
5213         for (i = units - 1; i >= 0; --i)
5214           {
5215             rtx elt = CONST_VECTOR_ELT (x, i);
5216             elts[i] = make_tree (itype, elt);
5217           }
5218
5219         return build_vector (type, elts);
5220       }
5221
5222     case PLUS:
5223       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5224                           make_tree (type, XEXP (x, 1)));
5225
5226     case MINUS:
5227       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5228                           make_tree (type, XEXP (x, 1)));
5229
5230     case NEG:
5231       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5232
5233     case MULT:
5234       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5235                           make_tree (type, XEXP (x, 1)));
5236
5237     case ASHIFT:
5238       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5239                           make_tree (type, XEXP (x, 1)));
5240
5241     case LSHIFTRT:
5242       t = unsigned_type_for (type);
5243       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5244                                          make_tree (t, XEXP (x, 0)),
5245                                          make_tree (type, XEXP (x, 1))));
5246
5247     case ASHIFTRT:
5248       t = signed_type_for (type);
5249       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5250                                          make_tree (t, XEXP (x, 0)),
5251                                          make_tree (type, XEXP (x, 1))));
5252
5253     case DIV:
5254       if (TREE_CODE (type) != REAL_TYPE)
5255         t = signed_type_for (type);
5256       else
5257         t = type;
5258
5259       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5260                                          make_tree (t, XEXP (x, 0)),
5261                                          make_tree (t, XEXP (x, 1))));
5262     case UDIV:
5263       t = unsigned_type_for (type);
5264       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5265                                          make_tree (t, XEXP (x, 0)),
5266                                          make_tree (t, XEXP (x, 1))));
5267
5268     case SIGN_EXTEND:
5269     case ZERO_EXTEND:
5270       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5271                                           GET_CODE (x) == ZERO_EXTEND);
5272       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5273
5274     case CONST:
5275       return make_tree (type, XEXP (x, 0));
5276
5277     case SYMBOL_REF:
5278       t = SYMBOL_REF_DECL (x);
5279       if (t)
5280         return fold_convert (type, build_fold_addr_expr (t));
5281       /* fall through.  */
5282
5283     default:
5284       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5285
5286       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5287          address mode to pointer mode.  */
5288       if (POINTER_TYPE_P (type))
5289         x = convert_memory_address_addr_space
5290           (SCALAR_INT_TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5291
5292       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5293          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5294       t->decl_with_rtl.rtl = x;
5295
5296       return t;
5297     }
5298 }
5299 \f
5300 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5301    and returning TARGET.
5302
5303    If TARGET is 0, a pseudo-register or constant is returned.  */
5304
5305 rtx
5306 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5307 {
5308   rtx tem = 0;
5309
5310   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5311     tem = simplify_binary_operation (AND, mode, op0, op1);
5312   if (tem == 0)
5313     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5314
5315   if (target == 0)
5316     target = tem;
5317   else if (tem != target)
5318     emit_move_insn (target, tem);
5319   return target;
5320 }
5321
5322 /* Helper function for emit_store_flag.  */
5323 rtx
5324 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5325              machine_mode mode, machine_mode compare_mode,
5326              int unsignedp, rtx x, rtx y, int normalizep,
5327              machine_mode target_mode)
5328 {
5329   struct expand_operand ops[4];
5330   rtx op0, comparison, subtarget;
5331   rtx_insn *last;
5332   scalar_int_mode result_mode = targetm.cstore_mode (icode);
5333   scalar_int_mode int_target_mode;
5334
5335   last = get_last_insn ();
5336   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5337   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5338   if (!x || !y)
5339     {
5340       delete_insns_since (last);
5341       return NULL_RTX;
5342     }
5343
5344   if (target_mode == VOIDmode)
5345     int_target_mode = result_mode;
5346   else
5347     int_target_mode = as_a <scalar_int_mode> (target_mode);
5348   if (!target)
5349     target = gen_reg_rtx (int_target_mode);
5350
5351   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5352
5353   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5354   create_fixed_operand (&ops[1], comparison);
5355   create_fixed_operand (&ops[2], x);
5356   create_fixed_operand (&ops[3], y);
5357   if (!maybe_expand_insn (icode, 4, ops))
5358     {
5359       delete_insns_since (last);
5360       return NULL_RTX;
5361     }
5362   subtarget = ops[0].value;
5363
5364   /* If we are converting to a wider mode, first convert to
5365      INT_TARGET_MODE, then normalize.  This produces better combining
5366      opportunities on machines that have a SIGN_EXTRACT when we are
5367      testing a single bit.  This mostly benefits the 68k.
5368
5369      If STORE_FLAG_VALUE does not have the sign bit set when
5370      interpreted in MODE, we can do this conversion as unsigned, which
5371      is usually more efficient.  */
5372   if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (result_mode))
5373     {
5374       convert_move (target, subtarget,
5375                     val_signbit_known_clear_p (result_mode,
5376                                                STORE_FLAG_VALUE));
5377       op0 = target;
5378       result_mode = int_target_mode;
5379     }
5380   else
5381     op0 = subtarget;
5382
5383   /* If we want to keep subexpressions around, don't reuse our last
5384      target.  */
5385   if (optimize)
5386     subtarget = 0;
5387
5388   /* Now normalize to the proper value in MODE.  Sometimes we don't
5389      have to do anything.  */
5390   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5391     ;
5392   /* STORE_FLAG_VALUE might be the most negative number, so write
5393      the comparison this way to avoid a compiler-time warning.  */
5394   else if (- normalizep == STORE_FLAG_VALUE)
5395     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5396
5397   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5398      it hard to use a value of just the sign bit due to ANSI integer
5399      constant typing rules.  */
5400   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5401     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5402                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5403                         normalizep == 1);
5404   else
5405     {
5406       gcc_assert (STORE_FLAG_VALUE & 1);
5407
5408       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5409       if (normalizep == -1)
5410         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5411     }
5412
5413   /* If we were converting to a smaller mode, do the conversion now.  */
5414   if (int_target_mode != result_mode)
5415     {
5416       convert_move (target, op0, 0);
5417       return target;
5418     }
5419   else
5420     return op0;
5421 }
5422
5423
5424 /* A subroutine of emit_store_flag only including "tricks" that do not
5425    need a recursive call.  These are kept separate to avoid infinite
5426    loops.  */
5427
5428 static rtx
5429 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5430                    machine_mode mode, int unsignedp, int normalizep,
5431                    machine_mode target_mode)
5432 {
5433   rtx subtarget;
5434   enum insn_code icode;
5435   machine_mode compare_mode;
5436   enum mode_class mclass;
5437   enum rtx_code scode;
5438
5439   if (unsignedp)
5440     code = unsigned_condition (code);
5441   scode = swap_condition (code);
5442
5443   /* If one operand is constant, make it the second one.  Only do this
5444      if the other operand is not constant as well.  */
5445
5446   if (swap_commutative_operands_p (op0, op1))
5447     {
5448       std::swap (op0, op1);
5449       code = swap_condition (code);
5450     }
5451
5452   if (mode == VOIDmode)
5453     mode = GET_MODE (op0);
5454
5455   /* For some comparisons with 1 and -1, we can convert this to
5456      comparisons with zero.  This will often produce more opportunities for
5457      store-flag insns.  */
5458
5459   switch (code)
5460     {
5461     case LT:
5462       if (op1 == const1_rtx)
5463         op1 = const0_rtx, code = LE;
5464       break;
5465     case LE:
5466       if (op1 == constm1_rtx)
5467         op1 = const0_rtx, code = LT;
5468       break;
5469     case GE:
5470       if (op1 == const1_rtx)
5471         op1 = const0_rtx, code = GT;
5472       break;
5473     case GT:
5474       if (op1 == constm1_rtx)
5475         op1 = const0_rtx, code = GE;
5476       break;
5477     case GEU:
5478       if (op1 == const1_rtx)
5479         op1 = const0_rtx, code = NE;
5480       break;
5481     case LTU:
5482       if (op1 == const1_rtx)
5483         op1 = const0_rtx, code = EQ;
5484       break;
5485     default:
5486       break;
5487     }
5488
5489   /* If we are comparing a double-word integer with zero or -1, we can
5490      convert the comparison into one involving a single word.  */
5491   scalar_int_mode int_mode;
5492   if (is_int_mode (mode, &int_mode)
5493       && GET_MODE_BITSIZE (int_mode) == BITS_PER_WORD * 2
5494       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5495     {
5496       rtx tem;
5497       if ((code == EQ || code == NE)
5498           && (op1 == const0_rtx || op1 == constm1_rtx))
5499         {
5500           rtx op00, op01;
5501
5502           /* Do a logical OR or AND of the two words and compare the
5503              result.  */
5504           op00 = simplify_gen_subreg (word_mode, op0, int_mode, 0);
5505           op01 = simplify_gen_subreg (word_mode, op0, int_mode, UNITS_PER_WORD);
5506           tem = expand_binop (word_mode,
5507                               op1 == const0_rtx ? ior_optab : and_optab,
5508                               op00, op01, NULL_RTX, unsignedp,
5509                               OPTAB_DIRECT);
5510
5511           if (tem != 0)
5512             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5513                                    unsignedp, normalizep);
5514         }
5515       else if ((code == LT || code == GE) && op1 == const0_rtx)
5516         {
5517           rtx op0h;
5518
5519           /* If testing the sign bit, can just test on high word.  */
5520           op0h = simplify_gen_subreg (word_mode, op0, int_mode,
5521                                       subreg_highpart_offset (word_mode,
5522                                                               int_mode));
5523           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5524                                  unsignedp, normalizep);
5525         }
5526       else
5527         tem = NULL_RTX;
5528
5529       if (tem)
5530         {
5531           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5532             return tem;
5533           if (!target)
5534             target = gen_reg_rtx (target_mode);
5535
5536           convert_move (target, tem,
5537                         !val_signbit_known_set_p (word_mode,
5538                                                   (normalizep ? normalizep
5539                                                    : STORE_FLAG_VALUE)));
5540           return target;
5541         }
5542     }
5543
5544   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5545      complement of A (for GE) and shifting the sign bit to the low bit.  */
5546   if (op1 == const0_rtx && (code == LT || code == GE)
5547       && is_int_mode (mode, &int_mode)
5548       && (normalizep || STORE_FLAG_VALUE == 1
5549           || val_signbit_p (int_mode, STORE_FLAG_VALUE)))
5550     {
5551       scalar_int_mode int_target_mode;
5552       subtarget = target;
5553
5554       if (!target)
5555         int_target_mode = int_mode;
5556       else
5557         {
5558           /* If the result is to be wider than OP0, it is best to convert it
5559              first.  If it is to be narrower, it is *incorrect* to convert it
5560              first.  */
5561           int_target_mode = as_a <scalar_int_mode> (target_mode);
5562           if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (int_mode))
5563             {
5564               op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5565               int_mode = int_target_mode;
5566             }
5567         }
5568
5569       if (int_target_mode != int_mode)
5570         subtarget = 0;
5571
5572       if (code == GE)
5573         op0 = expand_unop (int_mode, one_cmpl_optab, op0,
5574                            ((STORE_FLAG_VALUE == 1 || normalizep)
5575                             ? 0 : subtarget), 0);
5576
5577       if (STORE_FLAG_VALUE == 1 || normalizep)
5578         /* If we are supposed to produce a 0/1 value, we want to do
5579            a logical shift from the sign bit to the low-order bit; for
5580            a -1/0 value, we do an arithmetic shift.  */
5581         op0 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5582                             GET_MODE_BITSIZE (int_mode) - 1,
5583                             subtarget, normalizep != -1);
5584
5585       if (int_mode != int_target_mode)
5586         op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5587
5588       return op0;
5589     }
5590
5591   mclass = GET_MODE_CLASS (mode);
5592   FOR_EACH_MODE_FROM (compare_mode, mode)
5593     {
5594      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5595      icode = optab_handler (cstore_optab, optab_mode);
5596      if (icode != CODE_FOR_nothing)
5597         {
5598           do_pending_stack_adjust ();
5599           rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5600                                  unsignedp, op0, op1, normalizep, target_mode);
5601           if (tem)
5602             return tem;
5603
5604           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5605             {
5606               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5607                                  unsignedp, op1, op0, normalizep, target_mode);
5608               if (tem)
5609                 return tem;
5610             }
5611           break;
5612         }
5613     }
5614
5615   return 0;
5616 }
5617
5618 /* Subroutine of emit_store_flag that handles cases in which the operands
5619    are scalar integers.  SUBTARGET is the target to use for temporary
5620    operations and TRUEVAL is the value to store when the condition is
5621    true.  All other arguments are as for emit_store_flag.  */
5622
5623 rtx
5624 emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0,
5625                      rtx op1, scalar_int_mode mode, int unsignedp,
5626                      int normalizep, rtx trueval)
5627 {
5628   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5629   rtx_insn *last = get_last_insn ();
5630   rtx tem;
5631
5632   /* If this is an equality comparison of integers, we can try to exclusive-or
5633      (or subtract) the two operands and use a recursive call to try the
5634      comparison with zero.  Don't do any of these cases if branches are
5635      very cheap.  */
5636
5637   if ((code == EQ || code == NE) && op1 != const0_rtx)
5638     {
5639       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5640                           OPTAB_WIDEN);
5641
5642       if (tem == 0)
5643         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5644                             OPTAB_WIDEN);
5645       if (tem != 0)
5646         tem = emit_store_flag (target, code, tem, const0_rtx,
5647                                mode, unsignedp, normalizep);
5648       if (tem != 0)
5649         return tem;
5650
5651       delete_insns_since (last);
5652     }
5653
5654   /* For integer comparisons, try the reverse comparison.  However, for
5655      small X and if we'd have anyway to extend, implementing "X != 0"
5656      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5657   rtx_code rcode = reverse_condition (code);
5658   if (can_compare_p (rcode, mode, ccp_store_flag)
5659       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5660             && code == NE
5661             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5662             && op1 == const0_rtx))
5663     {
5664       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5665                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5666
5667       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5668       if (want_add
5669           && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5670                        optimize_insn_for_speed_p ()) == 0)
5671         {
5672           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5673                                    STORE_FLAG_VALUE, target_mode);
5674           if (tem != 0)
5675             tem = expand_binop (target_mode, add_optab, tem,
5676                                 gen_int_mode (normalizep, target_mode),
5677                                 target, 0, OPTAB_WIDEN);
5678         }
5679       else if (!want_add
5680                && rtx_cost (trueval, mode, XOR, 1,
5681                             optimize_insn_for_speed_p ()) == 0)
5682         {
5683           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5684                                    normalizep, target_mode);
5685           if (tem != 0)
5686             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5687                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5688         }
5689
5690       if (tem != 0)
5691         return tem;
5692       delete_insns_since (last);
5693     }
5694
5695   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5696      the constant zero.  Reject all other comparisons at this point.  Only
5697      do LE and GT if branches are expensive since they are expensive on
5698      2-operand machines.  */
5699
5700   if (op1 != const0_rtx
5701       || (code != EQ && code != NE
5702           && (BRANCH_COST (optimize_insn_for_speed_p (),
5703                            false) <= 1 || (code != LE && code != GT))))
5704     return 0;
5705
5706   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5707      do the necessary operation below.  */
5708
5709   tem = 0;
5710
5711   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5712      the sign bit set.  */
5713
5714   if (code == LE)
5715     {
5716       /* This is destructive, so SUBTARGET can't be OP0.  */
5717       if (rtx_equal_p (subtarget, op0))
5718         subtarget = 0;
5719
5720       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5721                           OPTAB_WIDEN);
5722       if (tem)
5723         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5724                             OPTAB_WIDEN);
5725     }
5726
5727   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5728      number of bits in the mode of OP0, minus one.  */
5729
5730   if (code == GT)
5731     {
5732       if (rtx_equal_p (subtarget, op0))
5733         subtarget = 0;
5734
5735       tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5736                                 GET_MODE_BITSIZE (mode) - 1,
5737                                 subtarget, 0);
5738       if (tem)
5739         tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5740                             OPTAB_WIDEN);
5741     }
5742
5743   if (code == EQ || code == NE)
5744     {
5745       /* For EQ or NE, one way to do the comparison is to apply an operation
5746          that converts the operand into a positive number if it is nonzero
5747          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5748          for NE we negate.  This puts the result in the sign bit.  Then we
5749          normalize with a shift, if needed.
5750
5751          Two operations that can do the above actions are ABS and FFS, so try
5752          them.  If that doesn't work, and MODE is smaller than a full word,
5753          we can use zero-extension to the wider mode (an unsigned conversion)
5754          as the operation.  */
5755
5756       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5757          that is compensated by the subsequent overflow when subtracting
5758          one / negating.  */
5759
5760       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5761         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5762       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5763         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5764       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5765         {
5766           tem = convert_modes (word_mode, mode, op0, 1);
5767           mode = word_mode;
5768         }
5769
5770       if (tem != 0)
5771         {
5772           if (code == EQ)
5773             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5774                                 0, OPTAB_WIDEN);
5775           else
5776             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5777         }
5778
5779       /* If we couldn't do it that way, for NE we can "or" the two's complement
5780          of the value with itself.  For EQ, we take the one's complement of
5781          that "or", which is an extra insn, so we only handle EQ if branches
5782          are expensive.  */
5783
5784       if (tem == 0
5785           && (code == NE
5786               || BRANCH_COST (optimize_insn_for_speed_p (),
5787                               false) > 1))
5788         {
5789           if (rtx_equal_p (subtarget, op0))
5790             subtarget = 0;
5791
5792           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5793           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5794                               OPTAB_WIDEN);
5795
5796           if (tem && code == EQ)
5797             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5798         }
5799     }
5800
5801   if (tem && normalizep)
5802     tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5803                               GET_MODE_BITSIZE (mode) - 1,
5804                               subtarget, normalizep == 1);
5805
5806   if (tem)
5807     {
5808       if (!target)
5809         ;
5810       else if (GET_MODE (tem) != target_mode)
5811         {
5812           convert_move (target, tem, 0);
5813           tem = target;
5814         }
5815       else if (!subtarget)
5816         {
5817           emit_move_insn (target, tem);
5818           tem = target;
5819         }
5820     }
5821   else
5822     delete_insns_since (last);
5823
5824   return tem;
5825 }
5826
5827 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5828    and storing in TARGET.  Normally return TARGET.
5829    Return 0 if that cannot be done.
5830
5831    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5832    it is VOIDmode, they cannot both be CONST_INT.
5833
5834    UNSIGNEDP is for the case where we have to widen the operands
5835    to perform the operation.  It says to use zero-extension.
5836
5837    NORMALIZEP is 1 if we should convert the result to be either zero
5838    or one.  Normalize is -1 if we should convert the result to be
5839    either zero or -1.  If NORMALIZEP is zero, the result will be left
5840    "raw" out of the scc insn.  */
5841
5842 rtx
5843 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5844                  machine_mode mode, int unsignedp, int normalizep)
5845 {
5846   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5847   enum rtx_code rcode;
5848   rtx subtarget;
5849   rtx tem, trueval;
5850   rtx_insn *last;
5851
5852   /* If we compare constants, we shouldn't use a store-flag operation,
5853      but a constant load.  We can get there via the vanilla route that
5854      usually generates a compare-branch sequence, but will in this case
5855      fold the comparison to a constant, and thus elide the branch.  */
5856   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5857     return NULL_RTX;
5858
5859   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5860                            target_mode);
5861   if (tem)
5862     return tem;
5863
5864   /* If we reached here, we can't do this with a scc insn, however there
5865      are some comparisons that can be done in other ways.  Don't do any
5866      of these cases if branches are very cheap.  */
5867   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5868     return 0;
5869
5870   /* See what we need to return.  We can only return a 1, -1, or the
5871      sign bit.  */
5872
5873   if (normalizep == 0)
5874     {
5875       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5876         normalizep = STORE_FLAG_VALUE;
5877
5878       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5879         ;
5880       else
5881         return 0;
5882     }
5883
5884   last = get_last_insn ();
5885
5886   /* If optimizing, use different pseudo registers for each insn, instead
5887      of reusing the same pseudo.  This leads to better CSE, but slows
5888      down the compiler, since there are more pseudos.  */
5889   subtarget = (!optimize
5890                && (target_mode == mode)) ? target : NULL_RTX;
5891   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5892
5893   /* For floating-point comparisons, try the reverse comparison or try
5894      changing the "orderedness" of the comparison.  */
5895   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5896     {
5897       enum rtx_code first_code;
5898       bool and_them;
5899
5900       rcode = reverse_condition_maybe_unordered (code);
5901       if (can_compare_p (rcode, mode, ccp_store_flag)
5902           && (code == ORDERED || code == UNORDERED
5903               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5904               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5905         {
5906           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5907                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5908
5909           /* For the reverse comparison, use either an addition or a XOR.  */
5910           if (want_add
5911               && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5912                            optimize_insn_for_speed_p ()) == 0)
5913             {
5914               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5915                                        STORE_FLAG_VALUE, target_mode);
5916               if (tem)
5917                 return expand_binop (target_mode, add_optab, tem,
5918                                      gen_int_mode (normalizep, target_mode),
5919                                      target, 0, OPTAB_WIDEN);
5920             }
5921           else if (!want_add
5922                    && rtx_cost (trueval, mode, XOR, 1,
5923                                 optimize_insn_for_speed_p ()) == 0)
5924             {
5925               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5926                                        normalizep, target_mode);
5927               if (tem)
5928                 return expand_binop (target_mode, xor_optab, tem, trueval,
5929                                      target, INTVAL (trueval) >= 0,
5930                                      OPTAB_WIDEN);
5931             }
5932         }
5933
5934       delete_insns_since (last);
5935
5936       /* Cannot split ORDERED and UNORDERED, only try the above trick.  */
5937       if (code == ORDERED || code == UNORDERED)
5938         return 0;
5939
5940       and_them = split_comparison (code, mode, &first_code, &code);
5941
5942       /* If there are no NaNs, the first comparison should always fall through.
5943          Effectively change the comparison to the other one.  */
5944       if (!HONOR_NANS (mode))
5945         {
5946           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5947           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5948                                     target_mode);
5949         }
5950
5951       if (!HAVE_conditional_move)
5952         return 0;
5953
5954       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5955          conditional move.  */
5956       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5957                                normalizep, target_mode);
5958       if (tem == 0)
5959         return 0;
5960
5961       if (and_them)
5962         tem = emit_conditional_move (target, code, op0, op1, mode,
5963                                      tem, const0_rtx, GET_MODE (tem), 0);
5964       else
5965         tem = emit_conditional_move (target, code, op0, op1, mode,
5966                                      trueval, tem, GET_MODE (tem), 0);
5967
5968       if (tem == 0)
5969         delete_insns_since (last);
5970       return tem;
5971     }
5972
5973   /* The remaining tricks only apply to integer comparisons.  */
5974
5975   scalar_int_mode int_mode;
5976   if (is_int_mode (mode, &int_mode))
5977     return emit_store_flag_int (target, subtarget, code, op0, op1, int_mode,
5978                                 unsignedp, normalizep, trueval);
5979
5980   return 0;
5981 }
5982
5983 /* Like emit_store_flag, but always succeeds.  */
5984
5985 rtx
5986 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5987                        machine_mode mode, int unsignedp, int normalizep)
5988 {
5989   rtx tem;
5990   rtx_code_label *label;
5991   rtx trueval, falseval;
5992
5993   /* First see if emit_store_flag can do the job.  */
5994   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5995   if (tem != 0)
5996     return tem;
5997
5998   if (!target)
5999     target = gen_reg_rtx (word_mode);
6000
6001   /* If this failed, we have to do this with set/compare/jump/set code.
6002      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
6003   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
6004   if (code == NE
6005       && GET_MODE_CLASS (mode) == MODE_INT
6006       && REG_P (target)
6007       && op0 == target
6008       && op1 == const0_rtx)
6009     {
6010       label = gen_label_rtx ();
6011       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
6012                                NULL_RTX, NULL, label,
6013                                profile_probability::uninitialized ());
6014       emit_move_insn (target, trueval);
6015       emit_label (label);
6016       return target;
6017     }
6018
6019   if (!REG_P (target)
6020       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
6021     target = gen_reg_rtx (GET_MODE (target));
6022
6023   /* Jump in the right direction if the target cannot implement CODE
6024      but can jump on its reverse condition.  */
6025   falseval = const0_rtx;
6026   if (! can_compare_p (code, mode, ccp_jump)
6027       && (! FLOAT_MODE_P (mode)
6028           || code == ORDERED || code == UNORDERED
6029           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6030           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6031     {
6032       enum rtx_code rcode;
6033       if (FLOAT_MODE_P (mode))
6034         rcode = reverse_condition_maybe_unordered (code);
6035       else
6036         rcode = reverse_condition (code);
6037
6038       /* Canonicalize to UNORDERED for the libcall.  */
6039       if (can_compare_p (rcode, mode, ccp_jump)
6040           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
6041         {
6042           falseval = trueval;
6043           trueval = const0_rtx;
6044           code = rcode;
6045         }
6046     }
6047
6048   emit_move_insn (target, trueval);
6049   label = gen_label_rtx ();
6050   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
6051                            label, profile_probability::uninitialized ());
6052
6053   emit_move_insn (target, falseval);
6054   emit_label (label);
6055
6056   return target;
6057 }
6058 \f
6059 /* Perform possibly multi-word comparison and conditional jump to LABEL
6060    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
6061    now a thin wrapper around do_compare_rtx_and_jump.  */
6062
6063 static void
6064 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6065                  rtx_code_label *label)
6066 {
6067   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6068   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6069                            NULL, label, profile_probability::uninitialized ());
6070 }