gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2017 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "target.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "predict.h"
  30 #include "memmodel.h"
  31 #include "tm_p.h"
  32 #include "expmed.h"
  33 #include "optabs.h"
  34 #include "emit-rtl.h"
  35 #include "diagnostic-core.h"
  36 #include "fold-const.h"
  37 #include "stor-layout.h"
  38 #include "dojump.h"
  39 #include "explow.h"
  40 #include "expr.h"
  41 #include "langhooks.h"
  42
  43 struct target_expmed default_target_expmed;
  44 #if SWITCHABLE_TARGET
  45 struct target_expmed *this_target_expmed = &default_target_expmed;
  46 #endif
  47
  48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  49                                    unsigned HOST_WIDE_INT,
  50                                    unsigned HOST_WIDE_INT,
  51                                    unsigned HOST_WIDE_INT,
  52                                    rtx, bool);
  53 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
  54                                      unsigned HOST_WIDE_INT,
  55                                      rtx, bool);
  56 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  57                                    unsigned HOST_WIDE_INT,
  58                                    unsigned HOST_WIDE_INT,
  59                                    unsigned HOST_WIDE_INT,
  60                                    rtx, bool);
  61 static rtx extract_fixed_bit_field (machine_mode, rtx,
  62                                     unsigned HOST_WIDE_INT,
  63                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  64 static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
  65                                       unsigned HOST_WIDE_INT,
  66                                       unsigned HOST_WIDE_INT, rtx, int, bool);
  67 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  68 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  69                                     unsigned HOST_WIDE_INT, int, bool);
  70 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  71 static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  72 static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  73
  74 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  75    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  76    The mask is truncated if necessary to the width of mode MODE.  The
  77    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
  78
  79 static inline rtx
  80 mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
  81 {
  82   return immed_wide_int_const
  83     (wi::shifted_mask (bitpos, bitsize, complement,
  84                        GET_MODE_PRECISION (mode)), mode);
  85 }
  86
  87 /* Test whether a value is zero of a power of two.  */
  88 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
  89   (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
  90
  91 struct init_expmed_rtl
  92 {
  93   rtx reg;
  94   rtx plus;
  95   rtx neg;
  96   rtx mult;
  97   rtx sdiv;
  98   rtx udiv;
  99   rtx sdiv_32;
 100   rtx smod_32;
 101   rtx wide_mult;
 102   rtx wide_lshr;
 103   rtx wide_trunc;
 104   rtx shift;
 105   rtx shift_mult;
 106   rtx shift_add;
 107   rtx shift_sub0;
 108   rtx shift_sub1;
 109   rtx zext;
 110   rtx trunc;
 111
 112   rtx pow2[MAX_BITS_PER_WORD];
 113   rtx cint[MAX_BITS_PER_WORD];
 114 };
 115
 116 static void
 117 init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
 118                       machine_mode from_mode, bool speed)
 119 {
 120   int to_size, from_size;
 121   rtx which;
 122
 123   to_size = GET_MODE_PRECISION (to_mode);
 124   from_size = GET_MODE_PRECISION (from_mode);
 125
 126   /* Most partial integers have a precision less than the "full"
 127      integer it requires for storage.  In case one doesn't, for
 128      comparison purposes here, reduce the bit size by one in that
 129      case.  */
 130   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 131       && pow2p_hwi (to_size))
 132     to_size --;
 133   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 134       && pow2p_hwi (from_size))
 135     from_size --;
 136
 137   /* Assume cost of zero-extend and sign-extend is the same.  */
 138   which = (to_size < from_size ? all->trunc : all->zext);
 139
 140   PUT_MODE (all->reg, from_mode);
 141   set_convert_cost (to_mode, from_mode, speed,
 142                     set_src_cost (which, to_mode, speed));
 143 }
 144
 145 static void
 146 init_expmed_one_mode (struct init_expmed_rtl *all,
 147                       machine_mode mode, int speed)
 148 {
 149   int m, n, mode_bitsize;
 150   machine_mode mode_from;
 151
 152   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 153
 154   PUT_MODE (all->reg, mode);
 155   PUT_MODE (all->plus, mode);
 156   PUT_MODE (all->neg, mode);
 157   PUT_MODE (all->mult, mode);
 158   PUT_MODE (all->sdiv, mode);
 159   PUT_MODE (all->udiv, mode);
 160   PUT_MODE (all->sdiv_32, mode);
 161   PUT_MODE (all->smod_32, mode);
 162   PUT_MODE (all->wide_trunc, mode);
 163   PUT_MODE (all->shift, mode);
 164   PUT_MODE (all->shift_mult, mode);
 165   PUT_MODE (all->shift_add, mode);
 166   PUT_MODE (all->shift_sub0, mode);
 167   PUT_MODE (all->shift_sub1, mode);
 168   PUT_MODE (all->zext, mode);
 169   PUT_MODE (all->trunc, mode);
 170
 171   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
 172   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
 173   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
 174   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
 175   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
 176
 177   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
 178                                      <= 2 * add_cost (speed, mode)));
 179   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
 180                                      <= 4 * add_cost (speed, mode)));
 181
 182   set_shift_cost (speed, mode, 0, 0);
 183   {
 184     int cost = add_cost (speed, mode);
 185     set_shiftadd_cost (speed, mode, 0, cost);
 186     set_shiftsub0_cost (speed, mode, 0, cost);
 187     set_shiftsub1_cost (speed, mode, 0, cost);
 188   }
 189
 190   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 191   for (m = 1; m < n; m++)
 192     {
 193       XEXP (all->shift, 1) = all->cint[m];
 194       XEXP (all->shift_mult, 1) = all->pow2[m];
 195
 196       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
 197       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
 198                                                        speed));
 199       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
 200                                                         speed));
 201       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
 202                                                         speed));
 203     }
 204
 205   if (SCALAR_INT_MODE_P (mode))
 206     {
 207       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 208            mode_from = (machine_mode)(mode_from + 1))
 209         init_expmed_one_conv (all, mode, mode_from, speed);
 210
 211       machine_mode wider_mode;
 212       if (GET_MODE_CLASS (mode) == MODE_INT
 213           && GET_MODE_WIDER_MODE (mode).exists (&wider_mode))
 214         {
 215           PUT_MODE (all->zext, wider_mode);
 216           PUT_MODE (all->wide_mult, wider_mode);
 217           PUT_MODE (all->wide_lshr, wider_mode);
 218           XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 219
 220           set_mul_widen_cost (speed, wider_mode,
 221                               set_src_cost (all->wide_mult, wider_mode, speed));
 222           set_mul_highpart_cost (speed, mode,
 223                                  set_src_cost (all->wide_trunc, mode, speed));
 224         }
 225     }
 226 }
 227
 228 void
 229 init_expmed (void)
 230 {
 231   struct init_expmed_rtl all;
 232   machine_mode mode = QImode;
 233   int m, speed;
 234
 235   memset (&all, 0, sizeof all);
 236   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 237     {
 238       all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
 239       all.cint[m] = GEN_INT (m);
 240     }
 241
 242   /* Avoid using hard regs in ways which may be unsupported.  */
 243   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 244   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 245   all.neg = gen_rtx_NEG (mode, all.reg);
 246   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 247   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 248   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 249   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 250   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 251   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 252   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 253   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 254   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 255   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 256   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 257   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 258   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 259   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 260   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 261
 262   for (speed = 0; speed < 2; speed++)
 263     {
 264       crtl->maybe_hot_insn_p = speed;
 265       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
 266
 267       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 268            mode = (machine_mode)(mode + 1))
 269         init_expmed_one_mode (&all, mode, speed);
 270
 271       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 272         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 273              mode = (machine_mode)(mode + 1))
 274           init_expmed_one_mode (&all, mode, speed);
 275
 276       if (MIN_MODE_VECTOR_INT != VOIDmode)
 277         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 278              mode = (machine_mode)(mode + 1))
 279           init_expmed_one_mode (&all, mode, speed);
 280     }
 281
 282   if (alg_hash_used_p ())
 283     {
 284       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 285       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 286     }
 287   else
 288     set_alg_hash_used_p (true);
 289   default_rtl_profile ();
 290
 291   ggc_free (all.trunc);
 292   ggc_free (all.shift_sub1);
 293   ggc_free (all.shift_sub0);
 294   ggc_free (all.shift_add);
 295   ggc_free (all.shift_mult);
 296   ggc_free (all.shift);
 297   ggc_free (all.wide_trunc);
 298   ggc_free (all.wide_lshr);
 299   ggc_free (all.wide_mult);
 300   ggc_free (all.zext);
 301   ggc_free (all.smod_32);
 302   ggc_free (all.sdiv_32);
 303   ggc_free (all.udiv);
 304   ggc_free (all.sdiv);
 305   ggc_free (all.mult);
 306   ggc_free (all.neg);
 307   ggc_free (all.plus);
 308   ggc_free (all.reg);
 309 }
 310
 311 /* Return an rtx representing minus the value of X.
 312    MODE is the intended mode of the result,
 313    useful if X is a CONST_INT.  */
 314
 315 rtx
 316 negate_rtx (machine_mode mode, rtx x)
 317 {
 318   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 319
 320   if (result == 0)
 321     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 322
 323   return result;
 324 }
 325
 326 /* Whether reverse storage order is supported on the target.  */
 327 static int reverse_storage_order_supported = -1;
 328
 329 /* Check whether reverse storage order is supported on the target.  */
 330
 331 static void
 332 check_reverse_storage_order_support (void)
 333 {
 334   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 335     {
 336       reverse_storage_order_supported = 0;
 337       sorry ("reverse scalar storage order");
 338     }
 339   else
 340     reverse_storage_order_supported = 1;
 341 }
 342
 343 /* Whether reverse FP storage order is supported on the target.  */
 344 static int reverse_float_storage_order_supported = -1;
 345
 346 /* Check whether reverse FP storage order is supported on the target.  */
 347
 348 static void
 349 check_reverse_float_storage_order_support (void)
 350 {
 351   if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 352     {
 353       reverse_float_storage_order_supported = 0;
 354       sorry ("reverse floating-point scalar storage order");
 355     }
 356   else
 357     reverse_float_storage_order_supported = 1;
 358 }
 359
 360 /* Return an rtx representing value of X with reverse storage order.
 361    MODE is the intended mode of the result,
 362    useful if X is a CONST_INT.  */
 363
 364 rtx
 365 flip_storage_order (machine_mode mode, rtx x)
 366 {
 367   machine_mode int_mode;
 368   rtx result;
 369
 370   if (mode == QImode)
 371     return x;
 372
 373   if (COMPLEX_MODE_P (mode))
 374     {
 375       rtx real = read_complex_part (x, false);
 376       rtx imag = read_complex_part (x, true);
 377
 378       real = flip_storage_order (GET_MODE_INNER (mode), real);
 379       imag = flip_storage_order (GET_MODE_INNER (mode), imag);
 380
 381       return gen_rtx_CONCAT (mode, real, imag);
 382     }
 383
 384   if (__builtin_expect (reverse_storage_order_supported < 0, 0))
 385     check_reverse_storage_order_support ();
 386
 387   if (SCALAR_INT_MODE_P (mode))
 388     int_mode = mode;
 389   else
 390     {
 391       if (FLOAT_MODE_P (mode)
 392           && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
 393         check_reverse_float_storage_order_support ();
 394
 395       int_mode = mode_for_size (GET_MODE_PRECISION (mode), MODE_INT, 0);
 396       if (int_mode == BLKmode)
 397         {
 398           sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
 399           return x;
 400         }
 401       x = gen_lowpart (int_mode, x);
 402     }
 403
 404   result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
 405   if (result == 0)
 406     result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
 407
 408   if (int_mode != mode)
 409     result = gen_lowpart (mode, result);
 410
 411   return result;
 412 }
 413
 414 /* Adjust bitfield memory MEM so that it points to the first unit of mode
 415    MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
 416    If MODE is BLKmode, return a reference to every byte in the bitfield.
 417    Set *NEW_BITNUM to the bit position of the field within the new memory.  */
 418
 419 static rtx
 420 narrow_bit_field_mem (rtx mem, machine_mode mode,
 421                       unsigned HOST_WIDE_INT bitsize,
 422                       unsigned HOST_WIDE_INT bitnum,
 423                       unsigned HOST_WIDE_INT *new_bitnum)
 424 {
 425   if (mode == BLKmode)
 426     {
 427       *new_bitnum = bitnum % BITS_PER_UNIT;
 428       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 429       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 430                             / BITS_PER_UNIT);
 431       return adjust_bitfield_address_size (mem, mode, offset, size);
 432     }
 433   else
 434     {
 435       unsigned int unit = GET_MODE_BITSIZE (mode);
 436       *new_bitnum = bitnum % unit;
 437       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 438       return adjust_bitfield_address (mem, mode, offset);
 439     }
 440 }
 441
 442 /* The caller wants to perform insertion or extraction PATTERN on a
 443    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 444    BITREGION_START and BITREGION_END are as for store_bit_field
 445    and FIELDMODE is the natural mode of the field.
 446
 447    Search for a mode that is compatible with the memory access
 448    restrictions and (where applicable) with a register insertion or
 449    extraction.  Return the new memory on success, storing the adjusted
 450    bit position in *NEW_BITNUM.  Return null otherwise.  */
 451
 452 static rtx
 453 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 454                               rtx op0, HOST_WIDE_INT bitsize,
 455                               HOST_WIDE_INT bitnum,
 456                               unsigned HOST_WIDE_INT bitregion_start,
 457                               unsigned HOST_WIDE_INT bitregion_end,
 458                               machine_mode fieldmode,
 459                               unsigned HOST_WIDE_INT *new_bitnum)
 460 {
 461   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 462                                 bitregion_end, MEM_ALIGN (op0),
 463                                 MEM_VOLATILE_P (op0));
 464   machine_mode best_mode;
 465   if (iter.next_mode (&best_mode))
 466     {
 467       /* We can use a memory in BEST_MODE.  See whether this is true for
 468          any wider modes.  All other things being equal, we prefer to
 469          use the widest mode possible because it tends to expose more
 470          CSE opportunities.  */
 471       if (!iter.prefer_smaller_modes ())
 472         {
 473           /* Limit the search to the mode required by the corresponding
 474              register insertion or extraction instruction, if any.  */
 475           machine_mode limit_mode = word_mode;
 476           extraction_insn insn;
 477           if (get_best_reg_extraction_insn (&insn, pattern,
 478                                             GET_MODE_BITSIZE (best_mode),
 479                                             fieldmode))
 480             limit_mode = insn.field_mode;
 481
 482           machine_mode wider_mode;
 483           while (iter.next_mode (&wider_mode)
 484                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 485             best_mode = wider_mode;
 486         }
 487       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 488                                    new_bitnum);
 489     }
 490   return NULL_RTX;
 491 }
 492
 493 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 494    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 495    offset is then BITNUM / BITS_PER_UNIT.  */
 496
 497 static bool
 498 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 499                      unsigned HOST_WIDE_INT bitsize,
 500                      machine_mode struct_mode)
 501 {
 502   if (BYTES_BIG_ENDIAN)
 503     return (bitnum % BITS_PER_UNIT == 0
 504             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 505                 || (bitnum + bitsize) % BITS_PER_WORD == 0));
 506   else
 507     return bitnum % BITS_PER_WORD == 0;
 508 }
 509
 510 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 511    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 512    Return false if the access would touch memory outside the range
 513    BITREGION_START to BITREGION_END for conformance to the C++ memory
 514    model.  */
 515
 516 static bool
 517 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 518                             unsigned HOST_WIDE_INT bitnum,
 519                             machine_mode fieldmode,
 520                             unsigned HOST_WIDE_INT bitregion_start,
 521                             unsigned HOST_WIDE_INT bitregion_end)
 522 {
 523   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 524
 525   /* -fstrict-volatile-bitfields must be enabled and we must have a
 526      volatile MEM.  */
 527   if (!MEM_P (op0)
 528       || !MEM_VOLATILE_P (op0)
 529       || flag_strict_volatile_bitfields <= 0)
 530     return false;
 531
 532   /* Non-integral modes likely only happen with packed structures.
 533      Punt.  */
 534   if (!SCALAR_INT_MODE_P (fieldmode))
 535     return false;
 536
 537   /* The bit size must not be larger than the field mode, and
 538      the field mode must not be larger than a word.  */
 539   if (bitsize > modesize || modesize > BITS_PER_WORD)
 540     return false;
 541
 542   /* Check for cases of unaligned fields that must be split.  */
 543   if (bitnum % modesize + bitsize > modesize)
 544     return false;
 545
 546   /* The memory must be sufficiently aligned for a MODESIZE access.
 547      This condition guarantees, that the memory access will not
 548      touch anything after the end of the structure.  */
 549   if (MEM_ALIGN (op0) < modesize)
 550     return false;
 551
 552   /* Check for cases where the C++ memory model applies.  */
 553   if (bitregion_end != 0
 554       && (bitnum - bitnum % modesize < bitregion_start
 555           || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
 556     return false;
 557
 558   return true;
 559 }
 560
 561 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 562    bit number BITNUM can be treated as a simple value of mode MODE.  */
 563
 564 static bool
 565 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 566                        unsigned HOST_WIDE_INT bitnum, machine_mode mode)
 567 {
 568   return (MEM_P (op0)
 569           && bitnum % BITS_PER_UNIT == 0
 570           && bitsize == GET_MODE_BITSIZE (mode)
 571           && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
 572               || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
 573                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 574 }
 575 \f
 576 /* Try to use instruction INSV to store VALUE into a field of OP0.
 577    BITSIZE and BITNUM are as for store_bit_field.  */
 578
 579 static bool
 580 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 581                             unsigned HOST_WIDE_INT bitsize,
 582                             unsigned HOST_WIDE_INT bitnum,
 583                             rtx value)
 584 {
 585   struct expand_operand ops[4];
 586   rtx value1;
 587   rtx xop0 = op0;
 588   rtx_insn *last = get_last_insn ();
 589   bool copy_back = false;
 590
 591   machine_mode op_mode = insv->field_mode;
 592   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 593   if (bitsize == 0 || bitsize > unit)
 594     return false;
 595
 596   if (MEM_P (xop0))
 597     /* Get a reference to the first byte of the field.  */
 598     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 599                                  &bitnum);
 600   else
 601     {
 602       /* Convert from counting within OP0 to counting in OP_MODE.  */
 603       if (BYTES_BIG_ENDIAN)
 604         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 605
 606       /* If xop0 is a register, we need it in OP_MODE
 607          to make it acceptable to the format of insv.  */
 608       if (GET_CODE (xop0) == SUBREG)
 609         /* We can't just change the mode, because this might clobber op0,
 610            and we will need the original value of op0 if insv fails.  */
 611         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 612       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 613         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 614     }
 615
 616   /* If the destination is a paradoxical subreg such that we need a
 617      truncate to the inner mode, perform the insertion on a temporary and
 618      truncate the result to the original destination.  Note that we can't
 619      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 620      X) 0)) is (reg:N X).  */
 621   if (GET_CODE (xop0) == SUBREG
 622       && REG_P (SUBREG_REG (xop0))
 623       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 624                                          op_mode))
 625     {
 626       rtx tem = gen_reg_rtx (op_mode);
 627       emit_move_insn (tem, xop0);
 628       xop0 = tem;
 629       copy_back = true;
 630     }
 631
 632   /* There are similar overflow check at the start of store_bit_field_1,
 633      but that only check the situation where the field lies completely
 634      outside the register, while there do have situation where the field
 635      lies partialy in the register, we need to adjust bitsize for this
 636      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
 637      will broken on those arch support bit insert instruction, like arm, aarch64
 638      etc.  */
 639   if (bitsize + bitnum > unit && bitnum < unit)
 640     {
 641       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
 642                "destination object, data truncated into %wu-bit",
 643                bitsize, unit - bitnum);
 644       bitsize = unit - bitnum;
 645     }
 646
 647   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 648      "backwards" from the size of the unit we are inserting into.
 649      Otherwise, we count bits from the most significant on a
 650      BYTES/BITS_BIG_ENDIAN machine.  */
 651
 652   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 653     bitnum = unit - bitsize - bitnum;
 654
 655   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 656   value1 = value;
 657   if (GET_MODE (value) != op_mode)
 658     {
 659       if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 660         {
 661           rtx tmp;
 662           /* Optimization: Don't bother really extending VALUE
 663              if it has all the bits we will actually use.  However,
 664              if we must narrow it, be sure we do it correctly.  */
 665
 666           if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 667             {
 668               tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 669               if (! tmp)
 670                 tmp = simplify_gen_subreg (op_mode,
 671                                            force_reg (GET_MODE (value),
 672                                                       value1),
 673                                            GET_MODE (value), 0);
 674             }
 675           else
 676             {
 677               tmp = gen_lowpart_if_possible (op_mode, value1);
 678               if (! tmp)
 679                 tmp = gen_lowpart (op_mode, force_reg (GET_MODE (value),
 680                                                        value1));
 681             }
 682           value1 = tmp;
 683         }
 684       else if (CONST_INT_P (value))
 685         value1 = gen_int_mode (INTVAL (value), op_mode);
 686       else
 687         /* Parse phase is supposed to make VALUE's data type
 688            match that of the component reference, which is a type
 689            at least as wide as the field; so VALUE should have
 690            a mode that corresponds to that type.  */
 691         gcc_assert (CONSTANT_P (value));
 692     }
 693
 694   create_fixed_operand (&ops[0], xop0);
 695   create_integer_operand (&ops[1], bitsize);
 696   create_integer_operand (&ops[2], bitnum);
 697   create_input_operand (&ops[3], value1, op_mode);
 698   if (maybe_expand_insn (insv->icode, 4, ops))
 699     {
 700       if (copy_back)
 701         convert_move (op0, xop0, true);
 702       return true;
 703     }
 704   delete_insns_since (last);
 705   return false;
 706 }
 707
 708 /* A subroutine of store_bit_field, with the same arguments.  Return true
 709    if the operation could be implemented.
 710
 711    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 712    no other way of implementing the operation.  If FALLBACK_P is false,
 713    return false instead.  */
 714
 715 static bool
 716 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 717                    unsigned HOST_WIDE_INT bitnum,
 718                    unsigned HOST_WIDE_INT bitregion_start,
 719                    unsigned HOST_WIDE_INT bitregion_end,
 720                    machine_mode fieldmode,
 721                    rtx value, bool reverse, bool fallback_p)
 722 {
 723   rtx op0 = str_rtx;
 724   rtx orig_value;
 725
 726   while (GET_CODE (op0) == SUBREG)
 727     {
 728       /* The following line once was done only if WORDS_BIG_ENDIAN,
 729          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 730          meaningful at a much higher level; when structures are copied
 731          between memory and regs, the higher-numbered regs
 732          always get higher addresses.  */
 733       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 734       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 735       int byte_offset = 0;
 736
 737       /* Paradoxical subregs need special handling on big-endian machines.  */
 738       if (paradoxical_subreg_p (op0))
 739         {
 740           int difference = inner_mode_size - outer_mode_size;
 741
 742           if (WORDS_BIG_ENDIAN)
 743             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 744           if (BYTES_BIG_ENDIAN)
 745             byte_offset += difference % UNITS_PER_WORD;
 746         }
 747       else
 748         byte_offset = SUBREG_BYTE (op0);
 749
 750       bitnum += byte_offset * BITS_PER_UNIT;
 751       op0 = SUBREG_REG (op0);
 752     }
 753
 754   /* No action is needed if the target is a register and if the field
 755      lies completely outside that register.  This can occur if the source
 756      code contains an out-of-bounds access to a small array.  */
 757   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 758     return true;
 759
 760   /* Use vec_set patterns for inserting parts of vectors whenever
 761      available.  */
 762   if (VECTOR_MODE_P (GET_MODE (op0))
 763       && !MEM_P (op0)
 764       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 765       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 766       && bitsize == GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
 767       && !(bitnum % GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
 768     {
 769       struct expand_operand ops[3];
 770       machine_mode outermode = GET_MODE (op0);
 771       machine_mode innermode = GET_MODE_INNER (outermode);
 772       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 773       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 774
 775       create_fixed_operand (&ops[0], op0);
 776       create_input_operand (&ops[1], value, innermode);
 777       create_integer_operand (&ops[2], pos);
 778       if (maybe_expand_insn (icode, 3, ops))
 779         return true;
 780     }
 781
 782   /* If the target is a register, overwriting the entire object, or storing
 783      a full-word or multi-word field can be done with just a SUBREG.  */
 784   if (!MEM_P (op0)
 785       && bitsize == GET_MODE_BITSIZE (fieldmode)
 786       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 787           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 788     {
 789       /* Use the subreg machinery either to narrow OP0 to the required
 790          words or to cope with mode punning between equal-sized modes.
 791          In the latter case, use subreg on the rhs side, not lhs.  */
 792       rtx sub;
 793
 794       if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
 795         {
 796           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 797           if (sub)
 798             {
 799               if (reverse)
 800                 sub = flip_storage_order (GET_MODE (op0), sub);
 801               emit_move_insn (op0, sub);
 802               return true;
 803             }
 804         }
 805       else
 806         {
 807           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 808                                      bitnum / BITS_PER_UNIT);
 809           if (sub)
 810             {
 811               if (reverse)
 812                 value = flip_storage_order (fieldmode, value);
 813               emit_move_insn (sub, value);
 814               return true;
 815             }
 816         }
 817     }
 818
 819   /* If the target is memory, storing any naturally aligned field can be
 820      done with a simple store.  For targets that support fast unaligned
 821      memory, any naturally sized, unit aligned field can be done directly.  */
 822   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
 823     {
 824       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 825       if (reverse)
 826         value = flip_storage_order (fieldmode, value);
 827       emit_move_insn (op0, value);
 828       return true;
 829     }
 830
 831   /* Make sure we are playing with integral modes.  Pun with subregs
 832      if we aren't.  This must come after the entire register case above,
 833      since that case is valid for any mode.  The following cases are only
 834      valid for integral modes.  */
 835   {
 836     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 837     if (imode != GET_MODE (op0))
 838       {
 839         if (MEM_P (op0))
 840           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
 841         else
 842           {
 843             gcc_assert (imode != BLKmode);
 844             op0 = gen_lowpart (imode, op0);
 845           }
 846       }
 847   }
 848
 849   /* Storing an lsb-aligned field in a register
 850      can be done with a movstrict instruction.  */
 851
 852   if (!MEM_P (op0)
 853       && !reverse
 854       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 855       && bitsize == GET_MODE_BITSIZE (fieldmode)
 856       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 857     {
 858       struct expand_operand ops[2];
 859       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 860       rtx arg0 = op0;
 861       unsigned HOST_WIDE_INT subreg_off;
 862
 863       if (GET_CODE (arg0) == SUBREG)
 864         {
 865           /* Else we've got some float mode source being extracted into
 866              a different float mode destination -- this combination of
 867              subregs results in Severe Tire Damage.  */
 868           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 869                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 870                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 871           arg0 = SUBREG_REG (arg0);
 872         }
 873
 874       subreg_off = bitnum / BITS_PER_UNIT;
 875       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 876         {
 877           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 878
 879           create_fixed_operand (&ops[0], arg0);
 880           /* Shrink the source operand to FIELDMODE.  */
 881           create_convert_operand_to (&ops[1], value, fieldmode, false);
 882           if (maybe_expand_insn (icode, 2, ops))
 883             return true;
 884         }
 885     }
 886
 887   /* Handle fields bigger than a word.  */
 888
 889   if (bitsize > BITS_PER_WORD)
 890     {
 891       /* Here we transfer the words of the field
 892          in the order least significant first.
 893          This is because the most significant word is the one which may
 894          be less than full.
 895          However, only do that if the value is not BLKmode.  */
 896
 897       const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 898       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 899       unsigned int i;
 900       rtx_insn *last;
 901
 902       /* This is the mode we must force value to, so that there will be enough
 903          subwords to extract.  Note that fieldmode will often (always?) be
 904          VOIDmode, because that is what store_field uses to indicate that this
 905          is a bit field, but passing VOIDmode to operand_subword_force
 906          is not allowed.  */
 907       fieldmode = GET_MODE (value);
 908       if (fieldmode == VOIDmode)
 909         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 910
 911       last = get_last_insn ();
 912       for (i = 0; i < nwords; i++)
 913         {
 914           /* If I is 0, use the low-order word in both field and target;
 915              if I is 1, use the next to lowest word; and so on.  */
 916           unsigned int wordnum = (backwards
 917                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 918                                   - i - 1
 919                                   : i);
 920           unsigned int bit_offset = (backwards ^ reverse
 921                                      ? MAX ((int) bitsize - ((int) i + 1)
 922                                             * BITS_PER_WORD,
 923                                             0)
 924                                      : (int) i * BITS_PER_WORD);
 925           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 926           unsigned HOST_WIDE_INT new_bitsize =
 927             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 928
 929           /* If the remaining chunk doesn't have full wordsize we have
 930              to make sure that for big-endian machines the higher order
 931              bits are used.  */
 932           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 933             value_word = simplify_expand_binop (word_mode, lshr_optab,
 934                                                 value_word,
 935                                                 GEN_INT (BITS_PER_WORD
 936                                                          - new_bitsize),
 937                                                 NULL_RTX, true,
 938                                                 OPTAB_LIB_WIDEN);
 939
 940           if (!store_bit_field_1 (op0, new_bitsize,
 941                                   bitnum + bit_offset,
 942                                   bitregion_start, bitregion_end,
 943                                   word_mode,
 944                                   value_word, reverse, fallback_p))
 945             {
 946               delete_insns_since (last);
 947               return false;
 948             }
 949         }
 950       return true;
 951     }
 952
 953   /* If VALUE has a floating-point or complex mode, access it as an
 954      integer of the corresponding size.  This can occur on a machine
 955      with 64 bit registers that uses SFmode for float.  It can also
 956      occur for unaligned float or complex fields.  */
 957   orig_value = value;
 958   if (GET_MODE (value) != VOIDmode
 959       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 960       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 961     {
 962       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 963       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 964     }
 965
 966   /* If OP0 is a multi-word register, narrow it to the affected word.
 967      If the region spans two words, defer to store_split_bit_field.
 968      Don't do this if op0 is a single hard register wider than word
 969      such as a float or vector register.  */
 970   if (!MEM_P (op0)
 971       && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD
 972       && (!REG_P (op0)
 973           || !HARD_REGISTER_P (op0)
 974           || HARD_REGNO_NREGS (REGNO (op0), GET_MODE (op0)) != 1))
 975     {
 976       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
 977         {
 978           if (!fallback_p)
 979             return false;
 980
 981           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 982                                  bitregion_end, value, reverse);
 983           return true;
 984         }
 985       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
 986                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 987       gcc_assert (op0);
 988       bitnum %= BITS_PER_WORD;
 989     }
 990
 991   /* From here on we can assume that the field to be stored in fits
 992      within a word.  If the destination is a register, it too fits
 993      in a word.  */
 994
 995   extraction_insn insv;
 996   if (!MEM_P (op0)
 997       && !reverse
 998       && get_best_reg_extraction_insn (&insv, EP_insv,
 999                                        GET_MODE_BITSIZE (GET_MODE (op0)),
1000                                        fieldmode)
1001       && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
1002     return true;
1003
1004   /* If OP0 is a memory, try copying it to a register and seeing if a
1005      cheap register alternative is available.  */
1006   if (MEM_P (op0) && !reverse)
1007     {
1008       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1009                                         fieldmode)
1010           && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
1011         return true;
1012
1013       rtx_insn *last = get_last_insn ();
1014
1015       /* Try loading part of OP0 into a register, inserting the bitfield
1016          into that, and then copying the result back to OP0.  */
1017       unsigned HOST_WIDE_INT bitpos;
1018       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1019                                                bitregion_start, bitregion_end,
1020                                                fieldmode, &bitpos);
1021       if (xop0)
1022         {
1023           rtx tempreg = copy_to_reg (xop0);
1024           if (store_bit_field_1 (tempreg, bitsize, bitpos,
1025                                  bitregion_start, bitregion_end,
1026                                  fieldmode, orig_value, reverse, false))
1027             {
1028               emit_move_insn (xop0, tempreg);
1029               return true;
1030             }
1031           delete_insns_since (last);
1032         }
1033     }
1034
1035   if (!fallback_p)
1036     return false;
1037
1038   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
1039                          bitregion_end, value, reverse);
1040   return true;
1041 }
1042
1043 /* Generate code to store value from rtx VALUE
1044    into a bit-field within structure STR_RTX
1045    containing BITSIZE bits starting at bit BITNUM.
1046
1047    BITREGION_START is bitpos of the first bitfield in this region.
1048    BITREGION_END is the bitpos of the ending bitfield in this region.
1049    These two fields are 0, if the C++ memory model does not apply,
1050    or we are not interested in keeping track of bitfield regions.
1051
1052    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1053
1054    If REVERSE is true, the store is to be done in reverse order.  */
1055
1056 void
1057 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1058                  unsigned HOST_WIDE_INT bitnum,
1059                  unsigned HOST_WIDE_INT bitregion_start,
1060                  unsigned HOST_WIDE_INT bitregion_end,
1061                  machine_mode fieldmode,
1062                  rtx value, bool reverse)
1063 {
1064   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1065   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
1066                                   bitregion_start, bitregion_end))
1067     {
1068       /* Storing of a full word can be done with a simple store.
1069          We know here that the field can be accessed with one single
1070          instruction.  For targets that support unaligned memory,
1071          an unaligned access may be necessary.  */
1072       if (bitsize == GET_MODE_BITSIZE (fieldmode))
1073         {
1074           str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
1075                                              bitnum / BITS_PER_UNIT);
1076           if (reverse)
1077             value = flip_storage_order (fieldmode, value);
1078           gcc_assert (bitnum % BITS_PER_UNIT == 0);
1079           emit_move_insn (str_rtx, value);
1080         }
1081       else
1082         {
1083           rtx temp;
1084
1085           str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
1086                                           &bitnum);
1087           gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (fieldmode));
1088           temp = copy_to_reg (str_rtx);
1089           if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
1090                                   fieldmode, value, reverse, true))
1091             gcc_unreachable ();
1092
1093           emit_move_insn (str_rtx, temp);
1094         }
1095
1096       return;
1097     }
1098
1099   /* Under the C++0x memory model, we must not touch bits outside the
1100      bit region.  Adjust the address to start at the beginning of the
1101      bit region.  */
1102   if (MEM_P (str_rtx) && bitregion_start > 0)
1103     {
1104       machine_mode bestmode;
1105       HOST_WIDE_INT offset, size;
1106
1107       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1108
1109       offset = bitregion_start / BITS_PER_UNIT;
1110       bitnum -= bitregion_start;
1111       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1112       bitregion_end -= bitregion_start;
1113       bitregion_start = 0;
1114       bestmode = get_best_mode (bitsize, bitnum,
1115                                 bitregion_start, bitregion_end,
1116                                 MEM_ALIGN (str_rtx), VOIDmode,
1117                                 MEM_VOLATILE_P (str_rtx));
1118       str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
1119     }
1120
1121   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1122                           bitregion_start, bitregion_end,
1123                           fieldmode, value, reverse, true))
1124     gcc_unreachable ();
1125 }
1126 \f
1127 /* Use shifts and boolean operations to store VALUE into a bit field of
1128    width BITSIZE in OP0, starting at bit BITNUM.
1129
1130    If REVERSE is true, the store is to be done in reverse order.  */
1131
1132 static void
1133 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1134                        unsigned HOST_WIDE_INT bitnum,
1135                        unsigned HOST_WIDE_INT bitregion_start,
1136                        unsigned HOST_WIDE_INT bitregion_end,
1137                        rtx value, bool reverse)
1138 {
1139   /* There is a case not handled here:
1140      a structure with a known alignment of just a halfword
1141      and a field split across two aligned halfwords within the structure.
1142      Or likewise a structure with a known alignment of just a byte
1143      and a field split across two bytes.
1144      Such cases are not supposed to be able to occur.  */
1145
1146   if (MEM_P (op0))
1147     {
1148       machine_mode mode = GET_MODE (op0);
1149       if (GET_MODE_BITSIZE (mode) == 0
1150           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1151         mode = word_mode;
1152       mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1153                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1154
1155       if (mode == VOIDmode)
1156         {
1157           /* The only way this should occur is if the field spans word
1158              boundaries.  */
1159           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1160                                  bitregion_end, value, reverse);
1161           return;
1162         }
1163
1164       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1165     }
1166
1167   store_fixed_bit_field_1 (op0, bitsize, bitnum, value, reverse);
1168 }
1169
1170 /* Helper function for store_fixed_bit_field, stores
1171    the bit field always using the MODE of OP0.  */
1172
1173 static void
1174 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1175                          unsigned HOST_WIDE_INT bitnum,
1176                          rtx value, bool reverse)
1177 {
1178   machine_mode mode;
1179   rtx temp;
1180   int all_zero = 0;
1181   int all_one = 0;
1182
1183   mode = GET_MODE (op0);
1184   gcc_assert (SCALAR_INT_MODE_P (mode));
1185
1186   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1187      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1188
1189   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1190     /* BITNUM is the distance between our msb
1191        and that of the containing datum.
1192        Convert it to the distance from the lsb.  */
1193     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1194
1195   /* Now BITNUM is always the distance between our lsb
1196      and that of OP0.  */
1197
1198   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1199      we must first convert its mode to MODE.  */
1200
1201   if (CONST_INT_P (value))
1202     {
1203       unsigned HOST_WIDE_INT v = UINTVAL (value);
1204
1205       if (bitsize < HOST_BITS_PER_WIDE_INT)
1206         v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1207
1208       if (v == 0)
1209         all_zero = 1;
1210       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1211                 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1212                || (bitsize == HOST_BITS_PER_WIDE_INT
1213                    && v == HOST_WIDE_INT_M1U))
1214         all_one = 1;
1215
1216       value = lshift_value (mode, v, bitnum);
1217     }
1218   else
1219     {
1220       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1221                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1222
1223       if (GET_MODE (value) != mode)
1224         value = convert_to_mode (mode, value, 1);
1225
1226       if (must_and)
1227         value = expand_binop (mode, and_optab, value,
1228                               mask_rtx (mode, 0, bitsize, 0),
1229                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1230       if (bitnum > 0)
1231         value = expand_shift (LSHIFT_EXPR, mode, value,
1232                               bitnum, NULL_RTX, 1);
1233     }
1234
1235   if (reverse)
1236     value = flip_storage_order (mode, value);
1237
1238   /* Now clear the chosen bits in OP0,
1239      except that if VALUE is -1 we need not bother.  */
1240   /* We keep the intermediates in registers to allow CSE to combine
1241      consecutive bitfield assignments.  */
1242
1243   temp = force_reg (mode, op0);
1244
1245   if (! all_one)
1246     {
1247       rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1248       if (reverse)
1249         mask = flip_storage_order (mode, mask);
1250       temp = expand_binop (mode, and_optab, temp, mask,
1251                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1252       temp = force_reg (mode, temp);
1253     }
1254
1255   /* Now logical-or VALUE into OP0, unless it is zero.  */
1256
1257   if (! all_zero)
1258     {
1259       temp = expand_binop (mode, ior_optab, temp, value,
1260                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1261       temp = force_reg (mode, temp);
1262     }
1263
1264   if (op0 != temp)
1265     {
1266       op0 = copy_rtx (op0);
1267       emit_move_insn (op0, temp);
1268     }
1269 }
1270 \f
1271 /* Store a bit field that is split across multiple accessible memory objects.
1272
1273    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1274    BITSIZE is the field width; BITPOS the position of its first bit
1275    (within the word).
1276    VALUE is the value to store.
1277
1278    If REVERSE is true, the store is to be done in reverse order.
1279
1280    This does not yet handle fields wider than BITS_PER_WORD.  */
1281
1282 static void
1283 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1284                        unsigned HOST_WIDE_INT bitpos,
1285                        unsigned HOST_WIDE_INT bitregion_start,
1286                        unsigned HOST_WIDE_INT bitregion_end,
1287                        rtx value, bool reverse)
1288 {
1289   unsigned int unit, total_bits, bitsdone = 0;
1290
1291   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1292      much at a time.  */
1293   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1294     unit = BITS_PER_WORD;
1295   else
1296     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1297
1298   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1299      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1300      again, and we will mutually recurse forever.  */
1301   if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1302     unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1303
1304   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1305      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1306      that VALUE might be a floating-point constant.  */
1307   if (CONSTANT_P (value) && !CONST_INT_P (value))
1308     {
1309       rtx word = gen_lowpart_common (word_mode, value);
1310
1311       if (word && (value != word))
1312         value = word;
1313       else
1314         value = gen_lowpart_common (word_mode,
1315                                     force_reg (GET_MODE (value) != VOIDmode
1316                                                ? GET_MODE (value)
1317                                                : word_mode, value));
1318     }
1319
1320   total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1321
1322   while (bitsdone < bitsize)
1323     {
1324       unsigned HOST_WIDE_INT thissize;
1325       unsigned HOST_WIDE_INT thispos;
1326       unsigned HOST_WIDE_INT offset;
1327       rtx part, word;
1328
1329       offset = (bitpos + bitsdone) / unit;
1330       thispos = (bitpos + bitsdone) % unit;
1331
1332       /* When region of bytes we can touch is restricted, decrease
1333          UNIT close to the end of the region as needed.  If op0 is a REG
1334          or SUBREG of REG, don't do this, as there can't be data races
1335          on a register and we can expand shorter code in some cases.  */
1336       if (bitregion_end
1337           && unit > BITS_PER_UNIT
1338           && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1339           && !REG_P (op0)
1340           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1341         {
1342           unit = unit / 2;
1343           continue;
1344         }
1345
1346       /* THISSIZE must not overrun a word boundary.  Otherwise,
1347          store_fixed_bit_field will call us again, and we will mutually
1348          recurse forever.  */
1349       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1350       thissize = MIN (thissize, unit - thispos);
1351
1352       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1353         {
1354           /* Fetch successively less significant portions.  */
1355           if (CONST_INT_P (value))
1356             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1357                              >> (bitsize - bitsdone - thissize))
1358                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1359           /* Likewise, but the source is little-endian.  */
1360           else if (reverse)
1361             part = extract_fixed_bit_field (word_mode, value, thissize,
1362                                             bitsize - bitsdone - thissize,
1363                                             NULL_RTX, 1, false);
1364           else
1365             {
1366               int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1367               /* The args are chosen so that the last part includes the
1368                  lsb.  Give extract_bit_field the value it needs (with
1369                  endianness compensation) to fetch the piece we want.  */
1370               part = extract_fixed_bit_field (word_mode, value, thissize,
1371                                               total_bits - bitsize + bitsdone,
1372                                               NULL_RTX, 1, false);
1373             }
1374         }
1375       else
1376         {
1377           /* Fetch successively more significant portions.  */
1378           if (CONST_INT_P (value))
1379             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1380                              >> bitsdone)
1381                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1382           /* Likewise, but the source is big-endian.  */
1383           else if (reverse)
1384             part = extract_fixed_bit_field (word_mode, value, thissize,
1385                                             total_bits - bitsdone - thissize,
1386                                             NULL_RTX, 1, false);
1387           else
1388             part = extract_fixed_bit_field (word_mode, value, thissize,
1389                                             bitsdone, NULL_RTX, 1, false);
1390         }
1391
1392       /* If OP0 is a register, then handle OFFSET here.  */
1393       if (SUBREG_P (op0) || REG_P (op0))
1394         {
1395           machine_mode op0_mode = GET_MODE (op0);
1396           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1397             word = offset ? const0_rtx : op0;
1398           else
1399             word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1400                                           GET_MODE (op0));
1401           offset &= BITS_PER_WORD / unit - 1;
1402         }
1403       else
1404         word = op0;
1405
1406       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1407          it is just an out-of-bounds access.  Ignore it.  */
1408       if (word != const0_rtx)
1409         store_fixed_bit_field (word, thissize, offset * unit + thispos,
1410                                bitregion_start, bitregion_end, part,
1411                                reverse);
1412       bitsdone += thissize;
1413     }
1414 }
1415 \f
1416 /* A subroutine of extract_bit_field_1 that converts return value X
1417    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1418    to extract_bit_field.  */
1419
1420 static rtx
1421 convert_extracted_bit_field (rtx x, machine_mode mode,
1422                              machine_mode tmode, bool unsignedp)
1423 {
1424   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1425     return x;
1426
1427   /* If the x mode is not a scalar integral, first convert to the
1428      integer mode of that size and then access it as a floating-point
1429      value via a SUBREG.  */
1430   if (!SCALAR_INT_MODE_P (tmode))
1431     {
1432       machine_mode smode;
1433
1434       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1435       x = convert_to_mode (smode, x, unsignedp);
1436       x = force_reg (smode, x);
1437       return gen_lowpart (tmode, x);
1438     }
1439
1440   return convert_to_mode (tmode, x, unsignedp);
1441 }
1442
1443 /* Try to use an ext(z)v pattern to extract a field from OP0.
1444    Return the extracted value on success, otherwise return null.
1445    EXT_MODE is the mode of the extraction and the other arguments
1446    are as for extract_bit_field.  */
1447
1448 static rtx
1449 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1450                               unsigned HOST_WIDE_INT bitsize,
1451                               unsigned HOST_WIDE_INT bitnum,
1452                               int unsignedp, rtx target,
1453                               machine_mode mode, machine_mode tmode)
1454 {
1455   struct expand_operand ops[4];
1456   rtx spec_target = target;
1457   rtx spec_target_subreg = 0;
1458   machine_mode ext_mode = extv->field_mode;
1459   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1460
1461   if (bitsize == 0 || unit < bitsize)
1462     return NULL_RTX;
1463
1464   if (MEM_P (op0))
1465     /* Get a reference to the first byte of the field.  */
1466     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1467                                 &bitnum);
1468   else
1469     {
1470       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1471       if (BYTES_BIG_ENDIAN)
1472         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1473
1474       /* If op0 is a register, we need it in EXT_MODE to make it
1475          acceptable to the format of ext(z)v.  */
1476       if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1477         return NULL_RTX;
1478       if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1479         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1480     }
1481
1482   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1483      "backwards" from the size of the unit we are extracting from.
1484      Otherwise, we count bits from the most significant on a
1485      BYTES/BITS_BIG_ENDIAN machine.  */
1486
1487   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1488     bitnum = unit - bitsize - bitnum;
1489
1490   if (target == 0)
1491     target = spec_target = gen_reg_rtx (tmode);
1492
1493   if (GET_MODE (target) != ext_mode)
1494     {
1495       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1496          between the mode of the extraction (word_mode) and the target
1497          mode.  Instead, create a temporary and use convert_move to set
1498          the target.  */
1499       if (REG_P (target)
1500           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1501         {
1502           target = gen_lowpart (ext_mode, target);
1503           if (GET_MODE_PRECISION (ext_mode)
1504               > GET_MODE_PRECISION (GET_MODE (spec_target)))
1505             spec_target_subreg = target;
1506         }
1507       else
1508         target = gen_reg_rtx (ext_mode);
1509     }
1510
1511   create_output_operand (&ops[0], target, ext_mode);
1512   create_fixed_operand (&ops[1], op0);
1513   create_integer_operand (&ops[2], bitsize);
1514   create_integer_operand (&ops[3], bitnum);
1515   if (maybe_expand_insn (extv->icode, 4, ops))
1516     {
1517       target = ops[0].value;
1518       if (target == spec_target)
1519         return target;
1520       if (target == spec_target_subreg)
1521         return spec_target;
1522       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1523     }
1524   return NULL_RTX;
1525 }
1526
1527 /* A subroutine of extract_bit_field, with the same arguments.
1528    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1529    if we can find no other means of implementing the operation.
1530    if FALLBACK_P is false, return NULL instead.  */
1531
1532 static rtx
1533 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1534                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1535                      machine_mode mode, machine_mode tmode,
1536                      bool reverse, bool fallback_p, rtx *alt_rtl)
1537 {
1538   rtx op0 = str_rtx;
1539   machine_mode int_mode;
1540   machine_mode mode1;
1541
1542   if (tmode == VOIDmode)
1543     tmode = mode;
1544
1545   while (GET_CODE (op0) == SUBREG)
1546     {
1547       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1548       op0 = SUBREG_REG (op0);
1549     }
1550
1551   /* If we have an out-of-bounds access to a register, just return an
1552      uninitialized register of the required mode.  This can occur if the
1553      source code contains an out-of-bounds access to a small array.  */
1554   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1555     return gen_reg_rtx (tmode);
1556
1557   if (REG_P (op0)
1558       && mode == GET_MODE (op0)
1559       && bitnum == 0
1560       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1561     {
1562       if (reverse)
1563         op0 = flip_storage_order (mode, op0);
1564       /* We're trying to extract a full register from itself.  */
1565       return op0;
1566     }
1567
1568   /* First try to check for vector from vector extractions.  */
1569   if (VECTOR_MODE_P (GET_MODE (op0))
1570       && !MEM_P (op0)
1571       && VECTOR_MODE_P (tmode)
1572       && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (tmode))
1573     {
1574       machine_mode new_mode = GET_MODE (op0);
1575       if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1576         {
1577           new_mode = mode_for_vector (GET_MODE_INNER (tmode),
1578                                       GET_MODE_BITSIZE (GET_MODE (op0))
1579                                       / GET_MODE_UNIT_BITSIZE (tmode));
1580           if (!VECTOR_MODE_P (new_mode)
1581               || GET_MODE_SIZE (new_mode) != GET_MODE_SIZE (GET_MODE (op0))
1582               || GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode)
1583               || !targetm.vector_mode_supported_p (new_mode))
1584             new_mode = VOIDmode;
1585         }
1586       if (new_mode != VOIDmode
1587           && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1588               != CODE_FOR_nothing)
1589           && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (tmode)
1590               == bitnum / GET_MODE_BITSIZE (tmode)))
1591         {
1592           struct expand_operand ops[3];
1593           machine_mode outermode = new_mode;
1594           machine_mode innermode = tmode;
1595           enum insn_code icode
1596             = convert_optab_handler (vec_extract_optab, outermode, innermode);
1597           unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1598
1599           if (new_mode != GET_MODE (op0))
1600             op0 = gen_lowpart (new_mode, op0);
1601           create_output_operand (&ops[0], target, innermode);
1602           ops[0].target = 1;
1603           create_input_operand (&ops[1], op0, outermode);
1604           create_integer_operand (&ops[2], pos);
1605           if (maybe_expand_insn (icode, 3, ops))
1606             {
1607               if (alt_rtl && ops[0].target)
1608                 *alt_rtl = target;
1609               target = ops[0].value;
1610               if (GET_MODE (target) != mode)
1611                 return gen_lowpart (tmode, target);
1612               return target;
1613             }
1614         }
1615     }
1616
1617   /* See if we can get a better vector mode before extracting.  */
1618   if (VECTOR_MODE_P (GET_MODE (op0))
1619       && !MEM_P (op0)
1620       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1621     {
1622       machine_mode new_mode;
1623
1624       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1625         new_mode = MIN_MODE_VECTOR_FLOAT;
1626       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1627         new_mode = MIN_MODE_VECTOR_FRACT;
1628       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1629         new_mode = MIN_MODE_VECTOR_UFRACT;
1630       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1631         new_mode = MIN_MODE_VECTOR_ACCUM;
1632       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1633         new_mode = MIN_MODE_VECTOR_UACCUM;
1634       else
1635         new_mode = MIN_MODE_VECTOR_INT;
1636
1637       FOR_EACH_MODE_FROM (new_mode, new_mode)
1638         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1639             && GET_MODE_UNIT_SIZE (new_mode) == GET_MODE_SIZE (tmode)
1640             && targetm.vector_mode_supported_p (new_mode))
1641           break;
1642       if (new_mode != VOIDmode)
1643         op0 = gen_lowpart (new_mode, op0);
1644     }
1645
1646   /* Use vec_extract patterns for extracting parts of vectors whenever
1647      available.  */
1648   if (VECTOR_MODE_P (GET_MODE (op0))
1649       && !MEM_P (op0)
1650       && (convert_optab_handler (vec_extract_optab, GET_MODE (op0),
1651                                  GET_MODE_INNER (GET_MODE (op0)))
1652           != CODE_FOR_nothing)
1653       && ((bitnum + bitsize - 1) / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
1654           == bitnum / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
1655     {
1656       struct expand_operand ops[3];
1657       machine_mode outermode = GET_MODE (op0);
1658       machine_mode innermode = GET_MODE_INNER (outermode);
1659       enum insn_code icode
1660         = convert_optab_handler (vec_extract_optab, outermode, innermode);
1661       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1662
1663       create_output_operand (&ops[0], target, innermode);
1664       ops[0].target = 1;
1665       create_input_operand (&ops[1], op0, outermode);
1666       create_integer_operand (&ops[2], pos);
1667       if (maybe_expand_insn (icode, 3, ops))
1668         {
1669           if (alt_rtl && ops[0].target)
1670             *alt_rtl = target;
1671           target = ops[0].value;
1672           if (GET_MODE (target) != mode)
1673             return gen_lowpart (tmode, target);
1674           return target;
1675         }
1676     }
1677
1678   /* Make sure we are playing with integral modes.  Pun with subregs
1679      if we aren't.  */
1680   {
1681     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1682     if (imode != GET_MODE (op0))
1683       {
1684         if (MEM_P (op0))
1685           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1686         else if (imode != BLKmode)
1687           {
1688             op0 = gen_lowpart (imode, op0);
1689
1690             /* If we got a SUBREG, force it into a register since we
1691                aren't going to be able to do another SUBREG on it.  */
1692             if (GET_CODE (op0) == SUBREG)
1693               op0 = force_reg (imode, op0);
1694           }
1695         else
1696           {
1697             HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1698             rtx mem = assign_stack_temp (GET_MODE (op0), size);
1699             emit_move_insn (mem, op0);
1700             op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1701           }
1702       }
1703   }
1704
1705   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1706      If that's wrong, the solution is to test for it and set TARGET to 0
1707      if needed.  */
1708
1709   /* Get the mode of the field to use for atomic access or subreg
1710      conversion.  */
1711   mode1 = mode;
1712   if (SCALAR_INT_MODE_P (tmode))
1713     {
1714       machine_mode try_mode = mode_for_size (bitsize,
1715                                                   GET_MODE_CLASS (tmode), 0);
1716       if (try_mode != BLKmode)
1717         mode1 = try_mode;
1718     }
1719   gcc_assert (mode1 != BLKmode);
1720
1721   /* Extraction of a full MODE1 value can be done with a subreg as long
1722      as the least significant bit of the value is the least significant
1723      bit of either OP0 or a word of OP0.  */
1724   if (!MEM_P (op0)
1725       && !reverse
1726       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1727       && bitsize == GET_MODE_BITSIZE (mode1)
1728       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1729     {
1730       rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1731                                      bitnum / BITS_PER_UNIT);
1732       if (sub)
1733         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1734     }
1735
1736   /* Extraction of a full MODE1 value can be done with a load as long as
1737      the field is on a byte boundary and is sufficiently aligned.  */
1738   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1739     {
1740       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1741       if (reverse)
1742         op0 = flip_storage_order (mode1, op0);
1743       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1744     }
1745
1746   /* Handle fields bigger than a word.  */
1747
1748   if (bitsize > BITS_PER_WORD)
1749     {
1750       /* Here we transfer the words of the field
1751          in the order least significant first.
1752          This is because the most significant word is the one which may
1753          be less than full.  */
1754
1755       const bool backwards = WORDS_BIG_ENDIAN;
1756       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1757       unsigned int i;
1758       rtx_insn *last;
1759
1760       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1761         target = gen_reg_rtx (mode);
1762
1763       /* In case we're about to clobber a base register or something
1764          (see gcc.c-torture/execute/20040625-1.c).   */
1765       if (reg_mentioned_p (target, str_rtx))
1766         target = gen_reg_rtx (mode);
1767
1768       /* Indicate for flow that the entire target reg is being set.  */
1769       emit_clobber (target);
1770
1771       last = get_last_insn ();
1772       for (i = 0; i < nwords; i++)
1773         {
1774           /* If I is 0, use the low-order word in both field and target;
1775              if I is 1, use the next to lowest word; and so on.  */
1776           /* Word number in TARGET to use.  */
1777           unsigned int wordnum
1778             = (backwards
1779                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1780                : i);
1781           /* Offset from start of field in OP0.  */
1782           unsigned int bit_offset = (backwards ^ reverse
1783                                      ? MAX ((int) bitsize - ((int) i + 1)
1784                                             * BITS_PER_WORD,
1785                                             0)
1786                                      : (int) i * BITS_PER_WORD);
1787           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1788           rtx result_part
1789             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1790                                              bitsize - i * BITS_PER_WORD),
1791                                    bitnum + bit_offset, 1, target_part,
1792                                    mode, word_mode, reverse, fallback_p, NULL);
1793
1794           gcc_assert (target_part);
1795           if (!result_part)
1796             {
1797               delete_insns_since (last);
1798               return NULL;
1799             }
1800
1801           if (result_part != target_part)
1802             emit_move_insn (target_part, result_part);
1803         }
1804
1805       if (unsignedp)
1806         {
1807           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1808              need to be zero'd out.  */
1809           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1810             {
1811               unsigned int i, total_words;
1812
1813               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1814               for (i = nwords; i < total_words; i++)
1815                 emit_move_insn
1816                   (operand_subword (target,
1817                                     backwards ? total_words - i - 1 : i,
1818                                     1, VOIDmode),
1819                    const0_rtx);
1820             }
1821           return target;
1822         }
1823
1824       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1825       target = expand_shift (LSHIFT_EXPR, mode, target,
1826                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1827       return expand_shift (RSHIFT_EXPR, mode, target,
1828                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1829     }
1830
1831   /* If OP0 is a multi-word register, narrow it to the affected word.
1832      If the region spans two words, defer to extract_split_bit_field.  */
1833   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1834     {
1835       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1836         {
1837           if (!fallback_p)
1838             return NULL_RTX;
1839           target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1840                                             reverse);
1841           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1842         }
1843       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1844                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1845       bitnum %= BITS_PER_WORD;
1846     }
1847
1848   /* From here on we know the desired field is smaller than a word.
1849      If OP0 is a register, it too fits within a word.  */
1850   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1851   extraction_insn extv;
1852   if (!MEM_P (op0)
1853       && !reverse
1854       /* ??? We could limit the structure size to the part of OP0 that
1855          contains the field, with appropriate checks for endianness
1856          and TRULY_NOOP_TRUNCATION.  */
1857       && get_best_reg_extraction_insn (&extv, pattern,
1858                                        GET_MODE_BITSIZE (GET_MODE (op0)),
1859                                        tmode))
1860     {
1861       rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1862                                                  unsignedp, target, mode,
1863                                                  tmode);
1864       if (result)
1865         return result;
1866     }
1867
1868   /* If OP0 is a memory, try copying it to a register and seeing if a
1869      cheap register alternative is available.  */
1870   if (MEM_P (op0) & !reverse)
1871     {
1872       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1873                                         tmode))
1874         {
1875           rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1876                                                      bitnum, unsignedp,
1877                                                      target, mode,
1878                                                      tmode);
1879           if (result)
1880             return result;
1881         }
1882
1883       rtx_insn *last = get_last_insn ();
1884
1885       /* Try loading part of OP0 into a register and extracting the
1886          bitfield from that.  */
1887       unsigned HOST_WIDE_INT bitpos;
1888       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1889                                                0, 0, tmode, &bitpos);
1890       if (xop0)
1891         {
1892           xop0 = copy_to_reg (xop0);
1893           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1894                                             unsignedp, target,
1895                                             mode, tmode, reverse, false, NULL);
1896           if (result)
1897             return result;
1898           delete_insns_since (last);
1899         }
1900     }
1901
1902   if (!fallback_p)
1903     return NULL;
1904
1905   /* Find a correspondingly-sized integer field, so we can apply
1906      shifts and masks to it.  */
1907   int_mode = int_mode_for_mode (tmode);
1908   if (int_mode == BLKmode)
1909     int_mode = int_mode_for_mode (mode);
1910   /* Should probably push op0 out to memory and then do a load.  */
1911   gcc_assert (int_mode != BLKmode);
1912
1913   target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum, target,
1914                                     unsignedp, reverse);
1915
1916   /* Complex values must be reversed piecewise, so we need to undo the global
1917      reversal, convert to the complex mode and reverse again.  */
1918   if (reverse && COMPLEX_MODE_P (tmode))
1919     {
1920       target = flip_storage_order (int_mode, target);
1921       target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1922       target = flip_storage_order (tmode, target);
1923     }
1924   else
1925     target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1926
1927   return target;
1928 }
1929
1930 /* Generate code to extract a byte-field from STR_RTX
1931    containing BITSIZE bits, starting at BITNUM,
1932    and put it in TARGET if possible (if TARGET is nonzero).
1933    Regardless of TARGET, we return the rtx for where the value is placed.
1934
1935    STR_RTX is the structure containing the byte (a REG or MEM).
1936    UNSIGNEDP is nonzero if this is an unsigned bit field.
1937    MODE is the natural mode of the field value once extracted.
1938    TMODE is the mode the caller would like the value to have;
1939    but the value may be returned with type MODE instead.
1940
1941    If REVERSE is true, the extraction is to be done in reverse order.
1942
1943    If a TARGET is specified and we can store in it at no extra cost,
1944    we do so, and return TARGET.
1945    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1946    if they are equally easy.  */
1947
1948 rtx
1949 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1950                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1951                    machine_mode mode, machine_mode tmode, bool reverse,
1952                    rtx *alt_rtl)
1953 {
1954   machine_mode mode1;
1955
1956   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1957   if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1958     mode1 = GET_MODE (str_rtx);
1959   else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1960     mode1 = GET_MODE (target);
1961   else
1962     mode1 = tmode;
1963
1964   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1965     {
1966       /* Extraction of a full MODE1 value can be done with a simple load.
1967          We know here that the field can be accessed with one single
1968          instruction.  For targets that support unaligned memory,
1969          an unaligned access may be necessary.  */
1970       if (bitsize == GET_MODE_BITSIZE (mode1))
1971         {
1972           rtx result = adjust_bitfield_address (str_rtx, mode1,
1973                                                 bitnum / BITS_PER_UNIT);
1974           if (reverse)
1975             result = flip_storage_order (mode1, result);
1976           gcc_assert (bitnum % BITS_PER_UNIT == 0);
1977           return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1978         }
1979
1980       str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1981                                       &bitnum);
1982       gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (mode1));
1983       str_rtx = copy_to_reg (str_rtx);
1984     }
1985
1986   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1987                               target, mode, tmode, reverse, true, alt_rtl);
1988 }
1989 \f
1990 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1991    from bit BITNUM of OP0.
1992
1993    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1994    If REVERSE is true, the extraction is to be done in reverse order.
1995
1996    If TARGET is nonzero, attempts to store the value there
1997    and return TARGET, but this is not guaranteed.
1998    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1999
2000 static rtx
2001 extract_fixed_bit_field (machine_mode tmode, rtx op0,
2002                          unsigned HOST_WIDE_INT bitsize,
2003                          unsigned HOST_WIDE_INT bitnum, rtx target,
2004                          int unsignedp, bool reverse)
2005 {
2006   if (MEM_P (op0))
2007     {
2008       machine_mode mode
2009         = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
2010                          MEM_VOLATILE_P (op0));
2011
2012       if (mode == VOIDmode)
2013         /* The only way this should occur is if the field spans word
2014            boundaries.  */
2015         return extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
2016                                         reverse);
2017
2018       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
2019     }
2020
2021   return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
2022                                     target, unsignedp, reverse);
2023 }
2024
2025 /* Helper function for extract_fixed_bit_field, extracts
2026    the bit field always using the MODE of OP0.  */
2027
2028 static rtx
2029 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
2030                            unsigned HOST_WIDE_INT bitsize,
2031                            unsigned HOST_WIDE_INT bitnum, rtx target,
2032                            int unsignedp, bool reverse)
2033 {
2034   machine_mode mode = GET_MODE (op0);
2035   gcc_assert (SCALAR_INT_MODE_P (mode));
2036
2037   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2038      for invalid input, such as extract equivalent of f5 from
2039      gcc.dg/pr48335-2.c.  */
2040
2041   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2042     /* BITNUM is the distance between our msb and that of OP0.
2043        Convert it to the distance from the lsb.  */
2044     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2045
2046   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2047      We have reduced the big-endian case to the little-endian case.  */
2048   if (reverse)
2049     op0 = flip_storage_order (mode, op0);
2050
2051   if (unsignedp)
2052     {
2053       if (bitnum)
2054         {
2055           /* If the field does not already start at the lsb,
2056              shift it so it does.  */
2057           /* Maybe propagate the target for the shift.  */
2058           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2059           if (tmode != mode)
2060             subtarget = 0;
2061           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2062         }
2063       /* Convert the value to the desired mode.  */
2064       if (mode != tmode)
2065         op0 = convert_to_mode (tmode, op0, 1);
2066
2067       /* Unless the msb of the field used to be the msb when we shifted,
2068          mask out the upper bits.  */
2069
2070       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2071         return expand_binop (GET_MODE (op0), and_optab, op0,
2072                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
2073                              target, 1, OPTAB_LIB_WIDEN);
2074       return op0;
2075     }
2076
2077   /* To extract a signed bit-field, first shift its msb to the msb of the word,
2078      then arithmetic-shift its lsb to the lsb of the word.  */
2079   op0 = force_reg (mode, op0);
2080
2081   /* Find the narrowest integer mode that contains the field.  */
2082
2083   FOR_EACH_MODE_IN_CLASS (mode, MODE_INT)
2084     if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
2085       {
2086         op0 = convert_to_mode (mode, op0, 0);
2087         break;
2088       }
2089
2090   if (mode != tmode)
2091     target = 0;
2092
2093   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2094     {
2095       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2096       /* Maybe propagate the target for the shift.  */
2097       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2098       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2099     }
2100
2101   return expand_shift (RSHIFT_EXPR, mode, op0,
2102                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2103 }
2104
2105 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2106    VALUE << BITPOS.  */
2107
2108 static rtx
2109 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2110               int bitpos)
2111 {
2112   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2113 }
2114 \f
2115 /* Extract a bit field that is split across two words
2116    and return an RTX for the result.
2117
2118    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2119    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2120    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2121
2122    If REVERSE is true, the extraction is to be done in reverse order.  */
2123
2124 static rtx
2125 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2126                          unsigned HOST_WIDE_INT bitpos, int unsignedp,
2127                          bool reverse)
2128 {
2129   unsigned int unit;
2130   unsigned int bitsdone = 0;
2131   rtx result = NULL_RTX;
2132   int first = 1;
2133
2134   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2135      much at a time.  */
2136   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2137     unit = BITS_PER_WORD;
2138   else
2139     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2140
2141   while (bitsdone < bitsize)
2142     {
2143       unsigned HOST_WIDE_INT thissize;
2144       rtx part, word;
2145       unsigned HOST_WIDE_INT thispos;
2146       unsigned HOST_WIDE_INT offset;
2147
2148       offset = (bitpos + bitsdone) / unit;
2149       thispos = (bitpos + bitsdone) % unit;
2150
2151       /* THISSIZE must not overrun a word boundary.  Otherwise,
2152          extract_fixed_bit_field will call us again, and we will mutually
2153          recurse forever.  */
2154       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2155       thissize = MIN (thissize, unit - thispos);
2156
2157       /* If OP0 is a register, then handle OFFSET here.  */
2158       if (SUBREG_P (op0) || REG_P (op0))
2159         {
2160           word = operand_subword_force (op0, offset, GET_MODE (op0));
2161           offset = 0;
2162         }
2163       else
2164         word = op0;
2165
2166       /* Extract the parts in bit-counting order,
2167          whose meaning is determined by BYTES_PER_UNIT.
2168          OFFSET is in UNITs, and UNIT is in bits.  */
2169       part = extract_fixed_bit_field (word_mode, word, thissize,
2170                                       offset * unit + thispos, 0, 1, reverse);
2171       bitsdone += thissize;
2172
2173       /* Shift this part into place for the result.  */
2174       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2175         {
2176           if (bitsize != bitsdone)
2177             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2178                                  bitsize - bitsdone, 0, 1);
2179         }
2180       else
2181         {
2182           if (bitsdone != thissize)
2183             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2184                                  bitsdone - thissize, 0, 1);
2185         }
2186
2187       if (first)
2188         result = part;
2189       else
2190         /* Combine the parts with bitwise or.  This works
2191            because we extracted each part as an unsigned bit field.  */
2192         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2193                                OPTAB_LIB_WIDEN);
2194
2195       first = 0;
2196     }
2197
2198   /* Unsigned bit field: we are done.  */
2199   if (unsignedp)
2200     return result;
2201   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2202   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2203                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2204   return expand_shift (RSHIFT_EXPR, word_mode, result,
2205                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2206 }
2207 \f
2208 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2209    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2210    MODE, fill the upper bits with zeros.  Fail if the layout of either
2211    mode is unknown (as for CC modes) or if the extraction would involve
2212    unprofitable mode punning.  Return the value on success, otherwise
2213    return null.
2214
2215    This is different from gen_lowpart* in these respects:
2216
2217      - the returned value must always be considered an rvalue
2218
2219      - when MODE is wider than SRC_MODE, the extraction involves
2220        a zero extension
2221
2222      - when MODE is smaller than SRC_MODE, the extraction involves
2223        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2224
2225    In other words, this routine performs a computation, whereas the
2226    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2227    operations.  */
2228
2229 rtx
2230 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2231 {
2232   machine_mode int_mode, src_int_mode;
2233
2234   if (mode == src_mode)
2235     return src;
2236
2237   if (CONSTANT_P (src))
2238     {
2239       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2240          fails, it will happily create (subreg (symbol_ref)) or similar
2241          invalid SUBREGs.  */
2242       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2243       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2244       if (ret)
2245         return ret;
2246
2247       if (GET_MODE (src) == VOIDmode
2248           || !validate_subreg (mode, src_mode, src, byte))
2249         return NULL_RTX;
2250
2251       src = force_reg (GET_MODE (src), src);
2252       return gen_rtx_SUBREG (mode, src, byte);
2253     }
2254
2255   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2256     return NULL_RTX;
2257
2258   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2259       && MODES_TIEABLE_P (mode, src_mode))
2260     {
2261       rtx x = gen_lowpart_common (mode, src);
2262       if (x)
2263         return x;
2264     }
2265
2266   src_int_mode = int_mode_for_mode (src_mode);
2267   int_mode = int_mode_for_mode (mode);
2268   if (src_int_mode == BLKmode || int_mode == BLKmode)
2269     return NULL_RTX;
2270
2271   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2272     return NULL_RTX;
2273   if (!MODES_TIEABLE_P (int_mode, mode))
2274     return NULL_RTX;
2275
2276   src = gen_lowpart (src_int_mode, src);
2277   src = convert_modes (int_mode, src_int_mode, src, true);
2278   src = gen_lowpart (mode, src);
2279   return src;
2280 }
2281 \f
2282 /* Add INC into TARGET.  */
2283
2284 void
2285 expand_inc (rtx target, rtx inc)
2286 {
2287   rtx value = expand_binop (GET_MODE (target), add_optab,
2288                             target, inc,
2289                             target, 0, OPTAB_LIB_WIDEN);
2290   if (value != target)
2291     emit_move_insn (target, value);
2292 }
2293
2294 /* Subtract DEC from TARGET.  */
2295
2296 void
2297 expand_dec (rtx target, rtx dec)
2298 {
2299   rtx value = expand_binop (GET_MODE (target), sub_optab,
2300                             target, dec,
2301                             target, 0, OPTAB_LIB_WIDEN);
2302   if (value != target)
2303     emit_move_insn (target, value);
2304 }
2305 \f
2306 /* Output a shift instruction for expression code CODE,
2307    with SHIFTED being the rtx for the value to shift,
2308    and AMOUNT the rtx for the amount to shift by.
2309    Store the result in the rtx TARGET, if that is convenient.
2310    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2311    Return the rtx for where the value is.
2312    If that cannot be done, abort the compilation unless MAY_FAIL is true,
2313    in which case 0 is returned.  */
2314
2315 static rtx
2316 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2317                 rtx amount, rtx target, int unsignedp, bool may_fail = false)
2318 {
2319   rtx op1, temp = 0;
2320   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2321   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2322   optab lshift_optab = ashl_optab;
2323   optab rshift_arith_optab = ashr_optab;
2324   optab rshift_uns_optab = lshr_optab;
2325   optab lrotate_optab = rotl_optab;
2326   optab rrotate_optab = rotr_optab;
2327   machine_mode op1_mode;
2328   machine_mode scalar_mode = mode;
2329   int attempt;
2330   bool speed = optimize_insn_for_speed_p ();
2331
2332   if (VECTOR_MODE_P (mode))
2333     scalar_mode = GET_MODE_INNER (mode);
2334   op1 = amount;
2335   op1_mode = GET_MODE (op1);
2336
2337   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2338      shift amount is a vector, use the vector/vector shift patterns.  */
2339   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2340     {
2341       lshift_optab = vashl_optab;
2342       rshift_arith_optab = vashr_optab;
2343       rshift_uns_optab = vlshr_optab;
2344       lrotate_optab = vrotl_optab;
2345       rrotate_optab = vrotr_optab;
2346     }
2347
2348   /* Previously detected shift-counts computed by NEGATE_EXPR
2349      and shifted in the other direction; but that does not work
2350      on all machines.  */
2351
2352   if (SHIFT_COUNT_TRUNCATED)
2353     {
2354       if (CONST_INT_P (op1)
2355           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2356               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2357         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2358                        % GET_MODE_BITSIZE (scalar_mode));
2359       else if (GET_CODE (op1) == SUBREG
2360                && subreg_lowpart_p (op1)
2361                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2362                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2363         op1 = SUBREG_REG (op1);
2364     }
2365
2366   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2367      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2368      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2369      amount instead.  */
2370   if (rotate
2371       && CONST_INT_P (op1)
2372       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2373                    GET_MODE_BITSIZE (scalar_mode) - 1))
2374     {
2375       op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2376       left = !left;
2377       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2378     }
2379
2380   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2381      Note that this is not the case for bigger values.  For instance a rotation
2382      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2383      0x04030201 (bswapsi).  */
2384   if (rotate
2385       && CONST_INT_P (op1)
2386       && INTVAL (op1) == BITS_PER_UNIT
2387       && GET_MODE_SIZE (scalar_mode) == 2
2388       && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2389     return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2390                                   unsignedp);
2391
2392   if (op1 == const0_rtx)
2393     return shifted;
2394
2395   /* Check whether its cheaper to implement a left shift by a constant
2396      bit count by a sequence of additions.  */
2397   if (code == LSHIFT_EXPR
2398       && CONST_INT_P (op1)
2399       && INTVAL (op1) > 0
2400       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2401       && INTVAL (op1) < MAX_BITS_PER_WORD
2402       && (shift_cost (speed, mode, INTVAL (op1))
2403           > INTVAL (op1) * add_cost (speed, mode))
2404       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2405     {
2406       int i;
2407       for (i = 0; i < INTVAL (op1); i++)
2408         {
2409           temp = force_reg (mode, shifted);
2410           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2411                                   unsignedp, OPTAB_LIB_WIDEN);
2412         }
2413       return shifted;
2414     }
2415
2416   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2417     {
2418       enum optab_methods methods;
2419
2420       if (attempt == 0)
2421         methods = OPTAB_DIRECT;
2422       else if (attempt == 1)
2423         methods = OPTAB_WIDEN;
2424       else
2425         methods = OPTAB_LIB_WIDEN;
2426
2427       if (rotate)
2428         {
2429           /* Widening does not work for rotation.  */
2430           if (methods == OPTAB_WIDEN)
2431             continue;
2432           else if (methods == OPTAB_LIB_WIDEN)
2433             {
2434               /* If we have been unable to open-code this by a rotation,
2435                  do it as the IOR of two shifts.  I.e., to rotate A
2436                  by N bits, compute
2437                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2438                  where C is the bitsize of A.
2439
2440                  It is theoretically possible that the target machine might
2441                  not be able to perform either shift and hence we would
2442                  be making two libcalls rather than just the one for the
2443                  shift (similarly if IOR could not be done).  We will allow
2444                  this extremely unlikely lossage to avoid complicating the
2445                  code below.  */
2446
2447               rtx subtarget = target == shifted ? 0 : target;
2448               rtx new_amount, other_amount;
2449               rtx temp1;
2450
2451               new_amount = op1;
2452               if (op1 == const0_rtx)
2453                 return shifted;
2454               else if (CONST_INT_P (op1))
2455                 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2456                                         - INTVAL (op1));
2457               else
2458                 {
2459                   other_amount
2460                     = simplify_gen_unary (NEG, GET_MODE (op1),
2461                                           op1, GET_MODE (op1));
2462                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2463                   other_amount
2464                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2465                                            gen_int_mode (mask, GET_MODE (op1)));
2466                 }
2467
2468               shifted = force_reg (mode, shifted);
2469
2470               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2471                                      mode, shifted, new_amount, 0, 1);
2472               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2473                                       mode, shifted, other_amount,
2474                                       subtarget, 1);
2475               return expand_binop (mode, ior_optab, temp, temp1, target,
2476                                    unsignedp, methods);
2477             }
2478
2479           temp = expand_binop (mode,
2480                                left ? lrotate_optab : rrotate_optab,
2481                                shifted, op1, target, unsignedp, methods);
2482         }
2483       else if (unsignedp)
2484         temp = expand_binop (mode,
2485                              left ? lshift_optab : rshift_uns_optab,
2486                              shifted, op1, target, unsignedp, methods);
2487
2488       /* Do arithmetic shifts.
2489          Also, if we are going to widen the operand, we can just as well
2490          use an arithmetic right-shift instead of a logical one.  */
2491       if (temp == 0 && ! rotate
2492           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2493         {
2494           enum optab_methods methods1 = methods;
2495
2496           /* If trying to widen a log shift to an arithmetic shift,
2497              don't accept an arithmetic shift of the same size.  */
2498           if (unsignedp)
2499             methods1 = OPTAB_MUST_WIDEN;
2500
2501           /* Arithmetic shift */
2502
2503           temp = expand_binop (mode,
2504                                left ? lshift_optab : rshift_arith_optab,
2505                                shifted, op1, target, unsignedp, methods1);
2506         }
2507
2508       /* We used to try extzv here for logical right shifts, but that was
2509          only useful for one machine, the VAX, and caused poor code
2510          generation there for lshrdi3, so the code was deleted and a
2511          define_expand for lshrsi3 was added to vax.md.  */
2512     }
2513
2514   gcc_assert (temp != NULL_RTX || may_fail);
2515   return temp;
2516 }
2517
2518 /* Output a shift instruction for expression code CODE,
2519    with SHIFTED being the rtx for the value to shift,
2520    and AMOUNT the amount to shift by.
2521    Store the result in the rtx TARGET, if that is convenient.
2522    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2523    Return the rtx for where the value is.  */
2524
2525 rtx
2526 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2527               int amount, rtx target, int unsignedp)
2528 {
2529   return expand_shift_1 (code, mode,
2530                          shifted, GEN_INT (amount), target, unsignedp);
2531 }
2532
2533 /* Likewise, but return 0 if that cannot be done.  */
2534
2535 static rtx
2536 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2537                     int amount, rtx target, int unsignedp)
2538 {
2539   return expand_shift_1 (code, mode,
2540                          shifted, GEN_INT (amount), target, unsignedp, true);
2541 }
2542
2543 /* Output a shift instruction for expression code CODE,
2544    with SHIFTED being the rtx for the value to shift,
2545    and AMOUNT the tree for the amount to shift by.
2546    Store the result in the rtx TARGET, if that is convenient.
2547    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2548    Return the rtx for where the value is.  */
2549
2550 rtx
2551 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2552                        tree amount, rtx target, int unsignedp)
2553 {
2554   return expand_shift_1 (code, mode,
2555                          shifted, expand_normal (amount), target, unsignedp);
2556 }
2557
2558 \f
2559 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2560                         const struct mult_cost *, machine_mode mode);
2561 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2562                               const struct algorithm *, enum mult_variant);
2563 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2564 static rtx extract_high_half (machine_mode, rtx);
2565 static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2566 static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
2567                                        int, int);
2568 /* Compute and return the best algorithm for multiplying by T.
2569    The algorithm must cost less than cost_limit
2570    If retval.cost >= COST_LIMIT, no algorithm was found and all
2571    other field of the returned struct are undefined.
2572    MODE is the machine mode of the multiplication.  */
2573
2574 static void
2575 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2576             const struct mult_cost *cost_limit, machine_mode mode)
2577 {
2578   int m;
2579   struct algorithm *alg_in, *best_alg;
2580   struct mult_cost best_cost;
2581   struct mult_cost new_limit;
2582   int op_cost, op_latency;
2583   unsigned HOST_WIDE_INT orig_t = t;
2584   unsigned HOST_WIDE_INT q;
2585   int maxm, hash_index;
2586   bool cache_hit = false;
2587   enum alg_code cache_alg = alg_zero;
2588   bool speed = optimize_insn_for_speed_p ();
2589   machine_mode imode;
2590   struct alg_hash_entry *entry_ptr;
2591
2592   /* Indicate that no algorithm is yet found.  If no algorithm
2593      is found, this value will be returned and indicate failure.  */
2594   alg_out->cost.cost = cost_limit->cost + 1;
2595   alg_out->cost.latency = cost_limit->latency + 1;
2596
2597   if (cost_limit->cost < 0
2598       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2599     return;
2600
2601   /* Be prepared for vector modes.  */
2602   imode = GET_MODE_INNER (mode);
2603
2604   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2605
2606   /* Restrict the bits of "t" to the multiplication's mode.  */
2607   t &= GET_MODE_MASK (imode);
2608
2609   /* t == 1 can be done in zero cost.  */
2610   if (t == 1)
2611     {
2612       alg_out->ops = 1;
2613       alg_out->cost.cost = 0;
2614       alg_out->cost.latency = 0;
2615       alg_out->op[0] = alg_m;
2616       return;
2617     }
2618
2619   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2620      fail now.  */
2621   if (t == 0)
2622     {
2623       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2624         return;
2625       else
2626         {
2627           alg_out->ops = 1;
2628           alg_out->cost.cost = zero_cost (speed);
2629           alg_out->cost.latency = zero_cost (speed);
2630           alg_out->op[0] = alg_zero;
2631           return;
2632         }
2633     }
2634
2635   /* We'll be needing a couple extra algorithm structures now.  */
2636
2637   alg_in = XALLOCA (struct algorithm);
2638   best_alg = XALLOCA (struct algorithm);
2639   best_cost = *cost_limit;
2640
2641   /* Compute the hash index.  */
2642   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2643
2644   /* See if we already know what to do for T.  */
2645   entry_ptr = alg_hash_entry_ptr (hash_index);
2646   if (entry_ptr->t == t
2647       && entry_ptr->mode == mode
2648       && entry_ptr->speed == speed
2649       && entry_ptr->alg != alg_unknown)
2650     {
2651       cache_alg = entry_ptr->alg;
2652
2653       if (cache_alg == alg_impossible)
2654         {
2655           /* The cache tells us that it's impossible to synthesize
2656              multiplication by T within entry_ptr->cost.  */
2657           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2658             /* COST_LIMIT is at least as restrictive as the one
2659                recorded in the hash table, in which case we have no
2660                hope of synthesizing a multiplication.  Just
2661                return.  */
2662             return;
2663
2664           /* If we get here, COST_LIMIT is less restrictive than the
2665              one recorded in the hash table, so we may be able to
2666              synthesize a multiplication.  Proceed as if we didn't
2667              have the cache entry.  */
2668         }
2669       else
2670         {
2671           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2672             /* The cached algorithm shows that this multiplication
2673                requires more cost than COST_LIMIT.  Just return.  This
2674                way, we don't clobber this cache entry with
2675                alg_impossible but retain useful information.  */
2676             return;
2677
2678           cache_hit = true;
2679
2680           switch (cache_alg)
2681             {
2682             case alg_shift:
2683               goto do_alg_shift;
2684
2685             case alg_add_t_m2:
2686             case alg_sub_t_m2:
2687               goto do_alg_addsub_t_m2;
2688
2689             case alg_add_factor:
2690             case alg_sub_factor:
2691               goto do_alg_addsub_factor;
2692
2693             case alg_add_t2_m:
2694               goto do_alg_add_t2_m;
2695
2696             case alg_sub_t2_m:
2697               goto do_alg_sub_t2_m;
2698
2699             default:
2700               gcc_unreachable ();
2701             }
2702         }
2703     }
2704
2705   /* If we have a group of zero bits at the low-order part of T, try
2706      multiplying by the remaining bits and then doing a shift.  */
2707
2708   if ((t & 1) == 0)
2709     {
2710     do_alg_shift:
2711       m = ctz_or_zero (t); /* m = number of low zero bits */
2712       if (m < maxm)
2713         {
2714           q = t >> m;
2715           /* The function expand_shift will choose between a shift and
2716              a sequence of additions, so the observed cost is given as
2717              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2718           op_cost = m * add_cost (speed, mode);
2719           if (shift_cost (speed, mode, m) < op_cost)
2720             op_cost = shift_cost (speed, mode, m);
2721           new_limit.cost = best_cost.cost - op_cost;
2722           new_limit.latency = best_cost.latency - op_cost;
2723           synth_mult (alg_in, q, &new_limit, mode);
2724
2725           alg_in->cost.cost += op_cost;
2726           alg_in->cost.latency += op_cost;
2727           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2728             {
2729               best_cost = alg_in->cost;
2730               std::swap (alg_in, best_alg);
2731               best_alg->log[best_alg->ops] = m;
2732               best_alg->op[best_alg->ops] = alg_shift;
2733             }
2734
2735           /* See if treating ORIG_T as a signed number yields a better
2736              sequence.  Try this sequence only for a negative ORIG_T
2737              as it would be useless for a non-negative ORIG_T.  */
2738           if ((HOST_WIDE_INT) orig_t < 0)
2739             {
2740               /* Shift ORIG_T as follows because a right shift of a
2741                  negative-valued signed type is implementation
2742                  defined.  */
2743               q = ~(~orig_t >> m);
2744               /* The function expand_shift will choose between a shift
2745                  and a sequence of additions, so the observed cost is
2746                  given as MIN (m * add_cost(speed, mode),
2747                  shift_cost(speed, mode, m)).  */
2748               op_cost = m * add_cost (speed, mode);
2749               if (shift_cost (speed, mode, m) < op_cost)
2750                 op_cost = shift_cost (speed, mode, m);
2751               new_limit.cost = best_cost.cost - op_cost;
2752               new_limit.latency = best_cost.latency - op_cost;
2753               synth_mult (alg_in, q, &new_limit, mode);
2754
2755               alg_in->cost.cost += op_cost;
2756               alg_in->cost.latency += op_cost;
2757               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2758                 {
2759                   best_cost = alg_in->cost;
2760                   std::swap (alg_in, best_alg);
2761                   best_alg->log[best_alg->ops] = m;
2762                   best_alg->op[best_alg->ops] = alg_shift;
2763                 }
2764             }
2765         }
2766       if (cache_hit)
2767         goto done;
2768     }
2769
2770   /* If we have an odd number, add or subtract one.  */
2771   if ((t & 1) != 0)
2772     {
2773       unsigned HOST_WIDE_INT w;
2774
2775     do_alg_addsub_t_m2:
2776       for (w = 1; (w & t) != 0; w <<= 1)
2777         ;
2778       /* If T was -1, then W will be zero after the loop.  This is another
2779          case where T ends with ...111.  Handling this with (T + 1) and
2780          subtract 1 produces slightly better code and results in algorithm
2781          selection much faster than treating it like the ...0111 case
2782          below.  */
2783       if (w == 0
2784           || (w > 2
2785               /* Reject the case where t is 3.
2786                  Thus we prefer addition in that case.  */
2787               && t != 3))
2788         {
2789           /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2790
2791           op_cost = add_cost (speed, mode);
2792           new_limit.cost = best_cost.cost - op_cost;
2793           new_limit.latency = best_cost.latency - op_cost;
2794           synth_mult (alg_in, t + 1, &new_limit, mode);
2795
2796           alg_in->cost.cost += op_cost;
2797           alg_in->cost.latency += op_cost;
2798           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2799             {
2800               best_cost = alg_in->cost;
2801               std::swap (alg_in, best_alg);
2802               best_alg->log[best_alg->ops] = 0;
2803               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2804             }
2805         }
2806       else
2807         {
2808           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2809
2810           op_cost = add_cost (speed, mode);
2811           new_limit.cost = best_cost.cost - op_cost;
2812           new_limit.latency = best_cost.latency - op_cost;
2813           synth_mult (alg_in, t - 1, &new_limit, mode);
2814
2815           alg_in->cost.cost += op_cost;
2816           alg_in->cost.latency += op_cost;
2817           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2818             {
2819               best_cost = alg_in->cost;
2820               std::swap (alg_in, best_alg);
2821               best_alg->log[best_alg->ops] = 0;
2822               best_alg->op[best_alg->ops] = alg_add_t_m2;
2823             }
2824         }
2825
2826       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2827          quickly with a - a * n for some appropriate constant n.  */
2828       m = exact_log2 (-orig_t + 1);
2829       if (m >= 0 && m < maxm)
2830         {
2831           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2832           /* If the target has a cheap shift-and-subtract insn use
2833              that in preference to a shift insn followed by a sub insn.
2834              Assume that the shift-and-sub is "atomic" with a latency
2835              equal to it's cost, otherwise assume that on superscalar
2836              hardware the shift may be executed concurrently with the
2837              earlier steps in the algorithm.  */
2838           if (shiftsub1_cost (speed, mode, m) <= op_cost)
2839             {
2840               op_cost = shiftsub1_cost (speed, mode, m);
2841               op_latency = op_cost;
2842             }
2843           else
2844             op_latency = add_cost (speed, mode);
2845
2846           new_limit.cost = best_cost.cost - op_cost;
2847           new_limit.latency = best_cost.latency - op_latency;
2848           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2849                       &new_limit, mode);
2850
2851           alg_in->cost.cost += op_cost;
2852           alg_in->cost.latency += op_latency;
2853           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2854             {
2855               best_cost = alg_in->cost;
2856               std::swap (alg_in, best_alg);
2857               best_alg->log[best_alg->ops] = m;
2858               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2859             }
2860         }
2861
2862       if (cache_hit)
2863         goto done;
2864     }
2865
2866   /* Look for factors of t of the form
2867      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2868      If we find such a factor, we can multiply by t using an algorithm that
2869      multiplies by q, shift the result by m and add/subtract it to itself.
2870
2871      We search for large factors first and loop down, even if large factors
2872      are less probable than small; if we find a large factor we will find a
2873      good sequence quickly, and therefore be able to prune (by decreasing
2874      COST_LIMIT) the search.  */
2875
2876  do_alg_addsub_factor:
2877   for (m = floor_log2 (t - 1); m >= 2; m--)
2878     {
2879       unsigned HOST_WIDE_INT d;
2880
2881       d = (HOST_WIDE_INT_1U << m) + 1;
2882       if (t % d == 0 && t > d && m < maxm
2883           && (!cache_hit || cache_alg == alg_add_factor))
2884         {
2885           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2886           if (shiftadd_cost (speed, mode, m) <= op_cost)
2887             op_cost = shiftadd_cost (speed, mode, m);
2888
2889           op_latency = op_cost;
2890
2891
2892           new_limit.cost = best_cost.cost - op_cost;
2893           new_limit.latency = best_cost.latency - op_latency;
2894           synth_mult (alg_in, t / d, &new_limit, mode);
2895
2896           alg_in->cost.cost += op_cost;
2897           alg_in->cost.latency += op_latency;
2898           if (alg_in->cost.latency < op_cost)
2899             alg_in->cost.latency = op_cost;
2900           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2901             {
2902               best_cost = alg_in->cost;
2903               std::swap (alg_in, best_alg);
2904               best_alg->log[best_alg->ops] = m;
2905               best_alg->op[best_alg->ops] = alg_add_factor;
2906             }
2907           /* Other factors will have been taken care of in the recursion.  */
2908           break;
2909         }
2910
2911       d = (HOST_WIDE_INT_1U << m) - 1;
2912       if (t % d == 0 && t > d && m < maxm
2913           && (!cache_hit || cache_alg == alg_sub_factor))
2914         {
2915           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2916           if (shiftsub0_cost (speed, mode, m) <= op_cost)
2917             op_cost = shiftsub0_cost (speed, mode, m);
2918
2919           op_latency = op_cost;
2920
2921           new_limit.cost = best_cost.cost - op_cost;
2922           new_limit.latency = best_cost.latency - op_latency;
2923           synth_mult (alg_in, t / d, &new_limit, mode);
2924
2925           alg_in->cost.cost += op_cost;
2926           alg_in->cost.latency += op_latency;
2927           if (alg_in->cost.latency < op_cost)
2928             alg_in->cost.latency = op_cost;
2929           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2930             {
2931               best_cost = alg_in->cost;
2932               std::swap (alg_in, best_alg);
2933               best_alg->log[best_alg->ops] = m;
2934               best_alg->op[best_alg->ops] = alg_sub_factor;
2935             }
2936           break;
2937         }
2938     }
2939   if (cache_hit)
2940     goto done;
2941
2942   /* Try shift-and-add (load effective address) instructions,
2943      i.e. do a*3, a*5, a*9.  */
2944   if ((t & 1) != 0)
2945     {
2946     do_alg_add_t2_m:
2947       q = t - 1;
2948       m = ctz_hwi (q);
2949       if (q && m < maxm)
2950         {
2951           op_cost = shiftadd_cost (speed, mode, m);
2952           new_limit.cost = best_cost.cost - op_cost;
2953           new_limit.latency = best_cost.latency - op_cost;
2954           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2955
2956           alg_in->cost.cost += op_cost;
2957           alg_in->cost.latency += op_cost;
2958           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2959             {
2960               best_cost = alg_in->cost;
2961               std::swap (alg_in, best_alg);
2962               best_alg->log[best_alg->ops] = m;
2963               best_alg->op[best_alg->ops] = alg_add_t2_m;
2964             }
2965         }
2966       if (cache_hit)
2967         goto done;
2968
2969     do_alg_sub_t2_m:
2970       q = t + 1;
2971       m = ctz_hwi (q);
2972       if (q && m < maxm)
2973         {
2974           op_cost = shiftsub0_cost (speed, mode, m);
2975           new_limit.cost = best_cost.cost - op_cost;
2976           new_limit.latency = best_cost.latency - op_cost;
2977           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2978
2979           alg_in->cost.cost += op_cost;
2980           alg_in->cost.latency += op_cost;
2981           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2982             {
2983               best_cost = alg_in->cost;
2984               std::swap (alg_in, best_alg);
2985               best_alg->log[best_alg->ops] = m;
2986               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2987             }
2988         }
2989       if (cache_hit)
2990         goto done;
2991     }
2992
2993  done:
2994   /* If best_cost has not decreased, we have not found any algorithm.  */
2995   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2996     {
2997       /* We failed to find an algorithm.  Record alg_impossible for
2998          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2999          we are asked to find an algorithm for T within the same or
3000          lower COST_LIMIT, we can immediately return to the
3001          caller.  */
3002       entry_ptr->t = t;
3003       entry_ptr->mode = mode;
3004       entry_ptr->speed = speed;
3005       entry_ptr->alg = alg_impossible;
3006       entry_ptr->cost = *cost_limit;
3007       return;
3008     }
3009
3010   /* Cache the result.  */
3011   if (!cache_hit)
3012     {
3013       entry_ptr->t = t;
3014       entry_ptr->mode = mode;
3015       entry_ptr->speed = speed;
3016       entry_ptr->alg = best_alg->op[best_alg->ops];
3017       entry_ptr->cost.cost = best_cost.cost;
3018       entry_ptr->cost.latency = best_cost.latency;
3019     }
3020
3021   /* If we are getting a too long sequence for `struct algorithm'
3022      to record, make this search fail.  */
3023   if (best_alg->ops == MAX_BITS_PER_WORD)
3024     return;
3025
3026   /* Copy the algorithm from temporary space to the space at alg_out.
3027      We avoid using structure assignment because the majority of
3028      best_alg is normally undefined, and this is a critical function.  */
3029   alg_out->ops = best_alg->ops + 1;
3030   alg_out->cost = best_cost;
3031   memcpy (alg_out->op, best_alg->op,
3032           alg_out->ops * sizeof *alg_out->op);
3033   memcpy (alg_out->log, best_alg->log,
3034           alg_out->ops * sizeof *alg_out->log);
3035 }
3036 \f
3037 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3038    Try three variations:
3039
3040        - a shift/add sequence based on VAL itself
3041        - a shift/add sequence based on -VAL, followed by a negation
3042        - a shift/add sequence based on VAL - 1, followed by an addition.
3043
3044    Return true if the cheapest of these cost less than MULT_COST,
3045    describing the algorithm in *ALG and final fixup in *VARIANT.  */
3046
3047 bool
3048 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3049                      struct algorithm *alg, enum mult_variant *variant,
3050                      int mult_cost)
3051 {
3052   struct algorithm alg2;
3053   struct mult_cost limit;
3054   int op_cost;
3055   bool speed = optimize_insn_for_speed_p ();
3056
3057   /* Fail quickly for impossible bounds.  */
3058   if (mult_cost < 0)
3059     return false;
3060
3061   /* Ensure that mult_cost provides a reasonable upper bound.
3062      Any constant multiplication can be performed with less
3063      than 2 * bits additions.  */
3064   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3065   if (mult_cost > op_cost)
3066     mult_cost = op_cost;
3067
3068   *variant = basic_variant;
3069   limit.cost = mult_cost;
3070   limit.latency = mult_cost;
3071   synth_mult (alg, val, &limit, mode);
3072
3073   /* This works only if the inverted value actually fits in an
3074      `unsigned int' */
3075   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3076     {
3077       op_cost = neg_cost (speed, mode);
3078       if (MULT_COST_LESS (&alg->cost, mult_cost))
3079         {
3080           limit.cost = alg->cost.cost - op_cost;
3081           limit.latency = alg->cost.latency - op_cost;
3082         }
3083       else
3084         {
3085           limit.cost = mult_cost - op_cost;
3086           limit.latency = mult_cost - op_cost;
3087         }
3088
3089       synth_mult (&alg2, -val, &limit, mode);
3090       alg2.cost.cost += op_cost;
3091       alg2.cost.latency += op_cost;
3092       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3093         *alg = alg2, *variant = negate_variant;
3094     }
3095
3096   /* This proves very useful for division-by-constant.  */
3097   op_cost = add_cost (speed, mode);
3098   if (MULT_COST_LESS (&alg->cost, mult_cost))
3099     {
3100       limit.cost = alg->cost.cost - op_cost;
3101       limit.latency = alg->cost.latency - op_cost;
3102     }
3103   else
3104     {
3105       limit.cost = mult_cost - op_cost;
3106       limit.latency = mult_cost - op_cost;
3107     }
3108
3109   synth_mult (&alg2, val - 1, &limit, mode);
3110   alg2.cost.cost += op_cost;
3111   alg2.cost.latency += op_cost;
3112   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3113     *alg = alg2, *variant = add_variant;
3114
3115   return MULT_COST_LESS (&alg->cost, mult_cost);
3116 }
3117
3118 /* A subroutine of expand_mult, used for constant multiplications.
3119    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3120    convenient.  Use the shift/add sequence described by ALG and apply
3121    the final fixup specified by VARIANT.  */
3122
3123 static rtx
3124 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3125                    rtx target, const struct algorithm *alg,
3126                    enum mult_variant variant)
3127 {
3128   unsigned HOST_WIDE_INT val_so_far;
3129   rtx_insn *insn;
3130   rtx accum, tem;
3131   int opno;
3132   machine_mode nmode;
3133
3134   /* Avoid referencing memory over and over and invalid sharing
3135      on SUBREGs.  */
3136   op0 = force_reg (mode, op0);
3137
3138   /* ACCUM starts out either as OP0 or as a zero, depending on
3139      the first operation.  */
3140
3141   if (alg->op[0] == alg_zero)
3142     {
3143       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3144       val_so_far = 0;
3145     }
3146   else if (alg->op[0] == alg_m)
3147     {
3148       accum = copy_to_mode_reg (mode, op0);
3149       val_so_far = 1;
3150     }
3151   else
3152     gcc_unreachable ();
3153
3154   for (opno = 1; opno < alg->ops; opno++)
3155     {
3156       int log = alg->log[opno];
3157       rtx shift_subtarget = optimize ? 0 : accum;
3158       rtx add_target
3159         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3160            && !optimize)
3161           ? target : 0;
3162       rtx accum_target = optimize ? 0 : accum;
3163       rtx accum_inner;
3164
3165       switch (alg->op[opno])
3166         {
3167         case alg_shift:
3168           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3169           /* REG_EQUAL note will be attached to the following insn.  */
3170           emit_move_insn (accum, tem);
3171           val_so_far <<= log;
3172           break;
3173
3174         case alg_add_t_m2:
3175           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3176           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3177                                  add_target ? add_target : accum_target);
3178           val_so_far += HOST_WIDE_INT_1U << log;
3179           break;
3180
3181         case alg_sub_t_m2:
3182           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3183           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3184                                  add_target ? add_target : accum_target);
3185           val_so_far -= HOST_WIDE_INT_1U << log;
3186           break;
3187
3188         case alg_add_t2_m:
3189           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3190                                 log, shift_subtarget, 0);
3191           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3192                                  add_target ? add_target : accum_target);
3193           val_so_far = (val_so_far << log) + 1;
3194           break;
3195
3196         case alg_sub_t2_m:
3197           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3198                                 log, shift_subtarget, 0);
3199           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3200                                  add_target ? add_target : accum_target);
3201           val_so_far = (val_so_far << log) - 1;
3202           break;
3203
3204         case alg_add_factor:
3205           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3206           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3207                                  add_target ? add_target : accum_target);
3208           val_so_far += val_so_far << log;
3209           break;
3210
3211         case alg_sub_factor:
3212           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3213           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3214                                  (add_target
3215                                   ? add_target : (optimize ? 0 : tem)));
3216           val_so_far = (val_so_far << log) - val_so_far;
3217           break;
3218
3219         default:
3220           gcc_unreachable ();
3221         }
3222
3223       if (SCALAR_INT_MODE_P (mode))
3224         {
3225           /* Write a REG_EQUAL note on the last insn so that we can cse
3226              multiplication sequences.  Note that if ACCUM is a SUBREG,
3227              we've set the inner register and must properly indicate that.  */
3228           tem = op0, nmode = mode;
3229           accum_inner = accum;
3230           if (GET_CODE (accum) == SUBREG)
3231             {
3232               accum_inner = SUBREG_REG (accum);
3233               nmode = GET_MODE (accum_inner);
3234               tem = gen_lowpart (nmode, op0);
3235             }
3236
3237           insn = get_last_insn ();
3238           set_dst_reg_note (insn, REG_EQUAL,
3239                             gen_rtx_MULT (nmode, tem,
3240                                           gen_int_mode (val_so_far, nmode)),
3241                             accum_inner);
3242         }
3243     }
3244
3245   if (variant == negate_variant)
3246     {
3247       val_so_far = -val_so_far;
3248       accum = expand_unop (mode, neg_optab, accum, target, 0);
3249     }
3250   else if (variant == add_variant)
3251     {
3252       val_so_far = val_so_far + 1;
3253       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3254     }
3255
3256   /* Compare only the bits of val and val_so_far that are significant
3257      in the result mode, to avoid sign-/zero-extension confusion.  */
3258   nmode = GET_MODE_INNER (mode);
3259   val &= GET_MODE_MASK (nmode);
3260   val_so_far &= GET_MODE_MASK (nmode);
3261   gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3262
3263   return accum;
3264 }
3265
3266 /* Perform a multiplication and return an rtx for the result.
3267    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3268    TARGET is a suggestion for where to store the result (an rtx).
3269
3270    We check specially for a constant integer as OP1.
3271    If you want this check for OP0 as well, then before calling
3272    you should swap the two operands if OP0 would be constant.  */
3273
3274 rtx
3275 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3276              int unsignedp)
3277 {
3278   enum mult_variant variant;
3279   struct algorithm algorithm;
3280   rtx scalar_op1;
3281   int max_cost;
3282   bool speed = optimize_insn_for_speed_p ();
3283   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3284
3285   if (CONSTANT_P (op0))
3286     std::swap (op0, op1);
3287
3288   /* For vectors, there are several simplifications that can be made if
3289      all elements of the vector constant are identical.  */
3290   scalar_op1 = unwrap_const_vec_duplicate (op1);
3291
3292   if (INTEGRAL_MODE_P (mode))
3293     {
3294       rtx fake_reg;
3295       HOST_WIDE_INT coeff;
3296       bool is_neg;
3297       int mode_bitsize;
3298
3299       if (op1 == CONST0_RTX (mode))
3300         return op1;
3301       if (op1 == CONST1_RTX (mode))
3302         return op0;
3303       if (op1 == CONSTM1_RTX (mode))
3304         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3305                             op0, target, 0);
3306
3307       if (do_trapv)
3308         goto skip_synth;
3309
3310       /* If mode is integer vector mode, check if the backend supports
3311          vector lshift (by scalar or vector) at all.  If not, we can't use
3312          synthetized multiply.  */
3313       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3314           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3315           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3316         goto skip_synth;
3317
3318       /* These are the operations that are potentially turned into
3319          a sequence of shifts and additions.  */
3320       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3321
3322       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3323          less than or equal in size to `unsigned int' this doesn't matter.
3324          If the mode is larger than `unsigned int', then synth_mult works
3325          only if the constant value exactly fits in an `unsigned int' without
3326          any truncation.  This means that multiplying by negative values does
3327          not work; results are off by 2^32 on a 32 bit machine.  */
3328       if (CONST_INT_P (scalar_op1))
3329         {
3330           coeff = INTVAL (scalar_op1);
3331           is_neg = coeff < 0;
3332         }
3333 #if TARGET_SUPPORTS_WIDE_INT
3334       else if (CONST_WIDE_INT_P (scalar_op1))
3335 #else
3336       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3337 #endif
3338         {
3339           int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3340           /* Perfect power of 2 (other than 1, which is handled above).  */
3341           if (shift > 0)
3342             return expand_shift (LSHIFT_EXPR, mode, op0,
3343                                  shift, target, unsignedp);
3344           else
3345             goto skip_synth;
3346         }
3347       else
3348         goto skip_synth;
3349
3350       /* We used to test optimize here, on the grounds that it's better to
3351          produce a smaller program when -O is not used.  But this causes
3352          such a terrible slowdown sometimes that it seems better to always
3353          use synth_mult.  */
3354
3355       /* Special case powers of two.  */
3356       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3357           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3358         return expand_shift (LSHIFT_EXPR, mode, op0,
3359                              floor_log2 (coeff), target, unsignedp);
3360
3361       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3362
3363       /* Attempt to handle multiplication of DImode values by negative
3364          coefficients, by performing the multiplication by a positive
3365          multiplier and then inverting the result.  */
3366       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3367         {
3368           /* Its safe to use -coeff even for INT_MIN, as the
3369              result is interpreted as an unsigned coefficient.
3370              Exclude cost of op0 from max_cost to match the cost
3371              calculation of the synth_mult.  */
3372           coeff = -(unsigned HOST_WIDE_INT) coeff;
3373           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3374                                     mode, speed)
3375                       - neg_cost (speed, mode));
3376           if (max_cost <= 0)
3377             goto skip_synth;
3378
3379           /* Special case powers of two.  */
3380           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3381             {
3382               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3383                                        floor_log2 (coeff), target, unsignedp);
3384               return expand_unop (mode, neg_optab, temp, target, 0);
3385             }
3386
3387           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3388                                    max_cost))
3389             {
3390               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3391                                             &algorithm, variant);
3392               return expand_unop (mode, neg_optab, temp, target, 0);
3393             }
3394           goto skip_synth;
3395         }
3396
3397       /* Exclude cost of op0 from max_cost to match the cost
3398          calculation of the synth_mult.  */
3399       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3400       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3401         return expand_mult_const (mode, op0, coeff, target,
3402                                   &algorithm, variant);
3403     }
3404  skip_synth:
3405
3406   /* Expand x*2.0 as x+x.  */
3407   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3408       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3409     {
3410       op0 = force_reg (GET_MODE (op0), op0);
3411       return expand_binop (mode, add_optab, op0, op0,
3412                            target, unsignedp, OPTAB_LIB_WIDEN);
3413     }
3414
3415   /* This used to use umul_optab if unsigned, but for non-widening multiply
3416      there is no difference between signed and unsigned.  */
3417   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3418                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3419   gcc_assert (op0);
3420   return op0;
3421 }
3422
3423 /* Return a cost estimate for multiplying a register by the given
3424    COEFFicient in the given MODE and SPEED.  */
3425
3426 int
3427 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3428 {
3429   int max_cost;
3430   struct algorithm algorithm;
3431   enum mult_variant variant;
3432
3433   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3434   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3435                            mode, speed);
3436   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3437     return algorithm.cost.cost;
3438   else
3439     return max_cost;
3440 }
3441
3442 /* Perform a widening multiplication and return an rtx for the result.
3443    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3444    TARGET is a suggestion for where to store the result (an rtx).
3445    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3446    or smul_widen_optab.
3447
3448    We check specially for a constant integer as OP1, comparing the
3449    cost of a widening multiply against the cost of a sequence of shifts
3450    and adds.  */
3451
3452 rtx
3453 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3454                       int unsignedp, optab this_optab)
3455 {
3456   bool speed = optimize_insn_for_speed_p ();
3457   rtx cop1;
3458
3459   if (CONST_INT_P (op1)
3460       && GET_MODE (op0) != VOIDmode
3461       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3462                                 this_optab == umul_widen_optab))
3463       && CONST_INT_P (cop1)
3464       && (INTVAL (cop1) >= 0
3465           || HWI_COMPUTABLE_MODE_P (mode)))
3466     {
3467       HOST_WIDE_INT coeff = INTVAL (cop1);
3468       int max_cost;
3469       enum mult_variant variant;
3470       struct algorithm algorithm;
3471
3472       if (coeff == 0)
3473         return CONST0_RTX (mode);
3474
3475       /* Special case powers of two.  */
3476       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3477         {
3478           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3479           return expand_shift (LSHIFT_EXPR, mode, op0,
3480                                floor_log2 (coeff), target, unsignedp);
3481         }
3482
3483       /* Exclude cost of op0 from max_cost to match the cost
3484          calculation of the synth_mult.  */
3485       max_cost = mul_widen_cost (speed, mode);
3486       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3487                                max_cost))
3488         {
3489           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3490           return expand_mult_const (mode, op0, coeff, target,
3491                                     &algorithm, variant);
3492         }
3493     }
3494   return expand_binop (mode, this_optab, op0, op1, target,
3495                        unsignedp, OPTAB_LIB_WIDEN);
3496 }
3497 \f
3498 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3499    replace division by D, and put the least significant N bits of the result
3500    in *MULTIPLIER_PTR and return the most significant bit.
3501
3502    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3503    needed precision is in PRECISION (should be <= N).
3504
3505    PRECISION should be as small as possible so this function can choose
3506    multiplier more freely.
3507
3508    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3509    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3510
3511    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3512    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3513
3514 unsigned HOST_WIDE_INT
3515 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3516                    unsigned HOST_WIDE_INT *multiplier_ptr,
3517                    int *post_shift_ptr, int *lgup_ptr)
3518 {
3519   int lgup, post_shift;
3520   int pow, pow2;
3521
3522   /* lgup = ceil(log2(divisor)); */
3523   lgup = ceil_log2 (d);
3524
3525   gcc_assert (lgup <= n);
3526
3527   pow = n + lgup;
3528   pow2 = n + lgup - precision;
3529
3530   /* mlow = 2^(N + lgup)/d */
3531   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3532   wide_int mlow = wi::udiv_trunc (val, d);
3533
3534   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3535   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3536   wide_int mhigh = wi::udiv_trunc (val, d);
3537
3538   /* If precision == N, then mlow, mhigh exceed 2^N
3539      (but they do not exceed 2^(N+1)).  */
3540
3541   /* Reduce to lowest terms.  */
3542   for (post_shift = lgup; post_shift > 0; post_shift--)
3543     {
3544       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3545                                                        HOST_BITS_PER_WIDE_INT);
3546       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3547                                                        HOST_BITS_PER_WIDE_INT);
3548       if (ml_lo >= mh_lo)
3549         break;
3550
3551       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3552       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3553     }
3554
3555   *post_shift_ptr = post_shift;
3556   *lgup_ptr = lgup;
3557   if (n < HOST_BITS_PER_WIDE_INT)
3558     {
3559       unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3560       *multiplier_ptr = mhigh.to_uhwi () & mask;
3561       return mhigh.to_uhwi () >= mask;
3562     }
3563   else
3564     {
3565       *multiplier_ptr = mhigh.to_uhwi ();
3566       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3567     }
3568 }
3569
3570 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3571    congruent to 1 (mod 2**N).  */
3572
3573 static unsigned HOST_WIDE_INT
3574 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3575 {
3576   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3577
3578   /* The algorithm notes that the choice y = x satisfies
3579      x*y == 1 mod 2^3, since x is assumed odd.
3580      Each iteration doubles the number of bits of significance in y.  */
3581
3582   unsigned HOST_WIDE_INT mask;
3583   unsigned HOST_WIDE_INT y = x;
3584   int nbit = 3;
3585
3586   mask = (n == HOST_BITS_PER_WIDE_INT
3587           ? HOST_WIDE_INT_M1U
3588           : (HOST_WIDE_INT_1U << n) - 1);
3589
3590   while (nbit < n)
3591     {
3592       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3593       nbit *= 2;
3594     }
3595   return y;
3596 }
3597
3598 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3599    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3600    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3601    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3602    become signed.
3603
3604    The result is put in TARGET if that is convenient.
3605
3606    MODE is the mode of operation.  */
3607
3608 rtx
3609 expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
3610                              rtx op1, rtx target, int unsignedp)
3611 {
3612   rtx tem;
3613   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3614
3615   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3616                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3617   tem = expand_and (mode, tem, op1, NULL_RTX);
3618   adj_operand
3619     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3620                      adj_operand);
3621
3622   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3623                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3624   tem = expand_and (mode, tem, op0, NULL_RTX);
3625   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3626                           target);
3627
3628   return target;
3629 }
3630
3631 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3632
3633 static rtx
3634 extract_high_half (machine_mode mode, rtx op)
3635 {
3636   machine_mode wider_mode;
3637
3638   if (mode == word_mode)
3639     return gen_highpart (mode, op);
3640
3641   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3642
3643   wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3644   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3645                      GET_MODE_BITSIZE (mode), 0, 1);
3646   return convert_modes (mode, wider_mode, op, 0);
3647 }
3648
3649 /* Like expmed_mult_highpart, but only consider using a multiplication
3650    optab.  OP1 is an rtx for the constant operand.  */
3651
3652 static rtx
3653 expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
3654                             rtx target, int unsignedp, int max_cost)
3655 {
3656   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3657   machine_mode wider_mode;
3658   optab moptab;
3659   rtx tem;
3660   int size;
3661   bool speed = optimize_insn_for_speed_p ();
3662
3663   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3664
3665   wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3666   size = GET_MODE_BITSIZE (mode);
3667
3668   /* Firstly, try using a multiplication insn that only generates the needed
3669      high part of the product, and in the sign flavor of unsignedp.  */
3670   if (mul_highpart_cost (speed, mode) < max_cost)
3671     {
3672       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3673       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3674                           unsignedp, OPTAB_DIRECT);
3675       if (tem)
3676         return tem;
3677     }
3678
3679   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3680      Need to adjust the result after the multiplication.  */
3681   if (size - 1 < BITS_PER_WORD
3682       && (mul_highpart_cost (speed, mode)
3683           + 2 * shift_cost (speed, mode, size-1)
3684           + 4 * add_cost (speed, mode) < max_cost))
3685     {
3686       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3687       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3688                           unsignedp, OPTAB_DIRECT);
3689       if (tem)
3690         /* We used the wrong signedness.  Adjust the result.  */
3691         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3692                                             tem, unsignedp);
3693     }
3694
3695   /* Try widening multiplication.  */
3696   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3697   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3698       && mul_widen_cost (speed, wider_mode) < max_cost)
3699     {
3700       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3701                           unsignedp, OPTAB_WIDEN);
3702       if (tem)
3703         return extract_high_half (mode, tem);
3704     }
3705
3706   /* Try widening the mode and perform a non-widening multiplication.  */
3707   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3708       && size - 1 < BITS_PER_WORD
3709       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3710           < max_cost))
3711     {
3712       rtx_insn *insns;
3713       rtx wop0, wop1;
3714
3715       /* We need to widen the operands, for example to ensure the
3716          constant multiplier is correctly sign or zero extended.
3717          Use a sequence to clean-up any instructions emitted by
3718          the conversions if things don't work out.  */
3719       start_sequence ();
3720       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3721       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3722       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3723                           unsignedp, OPTAB_WIDEN);
3724       insns = get_insns ();
3725       end_sequence ();
3726
3727       if (tem)
3728         {
3729           emit_insn (insns);
3730           return extract_high_half (mode, tem);
3731         }
3732     }
3733
3734   /* Try widening multiplication of opposite signedness, and adjust.  */
3735   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3736   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3737       && size - 1 < BITS_PER_WORD
3738       && (mul_widen_cost (speed, wider_mode)
3739           + 2 * shift_cost (speed, mode, size-1)
3740           + 4 * add_cost (speed, mode) < max_cost))
3741     {
3742       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3743                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3744       if (tem != 0)
3745         {
3746           tem = extract_high_half (mode, tem);
3747           /* We used the wrong signedness.  Adjust the result.  */
3748           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3749                                               target, unsignedp);
3750         }
3751     }
3752
3753   return 0;
3754 }
3755
3756 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3757    putting the high half of the result in TARGET if that is convenient,
3758    and return where the result is.  If the operation can not be performed,
3759    0 is returned.
3760
3761    MODE is the mode of operation and result.
3762
3763    UNSIGNEDP nonzero means unsigned multiply.
3764
3765    MAX_COST is the total allowed cost for the expanded RTL.  */
3766
3767 static rtx
3768 expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
3769                       rtx target, int unsignedp, int max_cost)
3770 {
3771   machine_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3772   unsigned HOST_WIDE_INT cnst1;
3773   int extra_cost;
3774   bool sign_adjust = false;
3775   enum mult_variant variant;
3776   struct algorithm alg;
3777   rtx tem;
3778   bool speed = optimize_insn_for_speed_p ();
3779
3780   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3781   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3782   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3783
3784   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3785
3786   /* We can't optimize modes wider than BITS_PER_WORD.
3787      ??? We might be able to perform double-word arithmetic if
3788      mode == word_mode, however all the cost calculations in
3789      synth_mult etc. assume single-word operations.  */
3790   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3791     return expmed_mult_highpart_optab (mode, op0, op1, target,
3792                                        unsignedp, max_cost);
3793
3794   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3795
3796   /* Check whether we try to multiply by a negative constant.  */
3797   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3798     {
3799       sign_adjust = true;
3800       extra_cost += add_cost (speed, mode);
3801     }
3802
3803   /* See whether shift/add multiplication is cheap enough.  */
3804   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3805                            max_cost - extra_cost))
3806     {
3807       /* See whether the specialized multiplication optabs are
3808          cheaper than the shift/add version.  */
3809       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3810                                         alg.cost.cost + extra_cost);
3811       if (tem)
3812         return tem;
3813
3814       tem = convert_to_mode (wider_mode, op0, unsignedp);
3815       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3816       tem = extract_high_half (mode, tem);
3817
3818       /* Adjust result for signedness.  */
3819       if (sign_adjust)
3820         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3821
3822       return tem;
3823     }
3824   return expmed_mult_highpart_optab (mode, op0, op1, target,
3825                                      unsignedp, max_cost);
3826 }
3827
3828
3829 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3830
3831 static rtx
3832 expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3833 {
3834   rtx result, temp, shift;
3835   rtx_code_label *label;
3836   int logd;
3837   int prec = GET_MODE_PRECISION (mode);
3838
3839   logd = floor_log2 (d);
3840   result = gen_reg_rtx (mode);
3841
3842   /* Avoid conditional branches when they're expensive.  */
3843   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3844       && optimize_insn_for_speed_p ())
3845     {
3846       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3847                                       mode, 0, -1);
3848       if (signmask)
3849         {
3850           HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
3851           signmask = force_reg (mode, signmask);
3852           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3853
3854           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3855              which instruction sequence to use.  If logical right shifts
3856              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3857              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3858
3859           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3860           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3861               || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
3862                   > COSTS_N_INSNS (2)))
3863             {
3864               temp = expand_binop (mode, xor_optab, op0, signmask,
3865                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3866               temp = expand_binop (mode, sub_optab, temp, signmask,
3867                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3868               temp = expand_binop (mode, and_optab, temp,
3869                                    gen_int_mode (masklow, mode),
3870                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3871               temp = expand_binop (mode, xor_optab, temp, signmask,
3872                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3873               temp = expand_binop (mode, sub_optab, temp, signmask,
3874                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3875             }
3876           else
3877             {
3878               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3879                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3880               signmask = force_reg (mode, signmask);
3881
3882               temp = expand_binop (mode, add_optab, op0, signmask,
3883                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3884               temp = expand_binop (mode, and_optab, temp,
3885                                    gen_int_mode (masklow, mode),
3886                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3887               temp = expand_binop (mode, sub_optab, temp, signmask,
3888                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3889             }
3890           return temp;
3891         }
3892     }
3893
3894   /* Mask contains the mode's signbit and the significant bits of the
3895      modulus.  By including the signbit in the operation, many targets
3896      can avoid an explicit compare operation in the following comparison
3897      against zero.  */
3898   wide_int mask = wi::mask (logd, false, prec);
3899   mask = wi::set_bit (mask, prec - 1);
3900
3901   temp = expand_binop (mode, and_optab, op0,
3902                        immed_wide_int_const (mask, mode),
3903                        result, 1, OPTAB_LIB_WIDEN);
3904   if (temp != result)
3905     emit_move_insn (result, temp);
3906
3907   label = gen_label_rtx ();
3908   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3909
3910   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3911                        0, OPTAB_LIB_WIDEN);
3912
3913   mask = wi::mask (logd, true, prec);
3914   temp = expand_binop (mode, ior_optab, temp,
3915                        immed_wide_int_const (mask, mode),
3916                        result, 1, OPTAB_LIB_WIDEN);
3917   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3918                        0, OPTAB_LIB_WIDEN);
3919   if (temp != result)
3920     emit_move_insn (result, temp);
3921   emit_label (label);
3922   return result;
3923 }
3924
3925 /* Expand signed division of OP0 by a power of two D in mode MODE.
3926    This routine is only called for positive values of D.  */
3927
3928 static rtx
3929 expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3930 {
3931   rtx temp;
3932   rtx_code_label *label;
3933   int logd;
3934
3935   logd = floor_log2 (d);
3936
3937   if (d == 2
3938       && BRANCH_COST (optimize_insn_for_speed_p (),
3939                       false) >= 1)
3940     {
3941       temp = gen_reg_rtx (mode);
3942       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3943       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3944                            0, OPTAB_LIB_WIDEN);
3945       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3946     }
3947
3948   if (HAVE_conditional_move
3949       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
3950     {
3951       rtx temp2;
3952
3953       start_sequence ();
3954       temp2 = copy_to_mode_reg (mode, op0);
3955       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3956                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3957       temp = force_reg (mode, temp);
3958
3959       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3960       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3961                                      mode, temp, temp2, mode, 0);
3962       if (temp2)
3963         {
3964           rtx_insn *seq = get_insns ();
3965           end_sequence ();
3966           emit_insn (seq);
3967           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3968         }
3969       end_sequence ();
3970     }
3971
3972   if (BRANCH_COST (optimize_insn_for_speed_p (),
3973                    false) >= 2)
3974     {
3975       int ushift = GET_MODE_BITSIZE (mode) - logd;
3976
3977       temp = gen_reg_rtx (mode);
3978       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3979       if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3980           || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3981              > COSTS_N_INSNS (1))
3982         temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3983                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3984       else
3985         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3986                              ushift, NULL_RTX, 1);
3987       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3988                            0, OPTAB_LIB_WIDEN);
3989       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3990     }
3991
3992   label = gen_label_rtx ();
3993   temp = copy_to_mode_reg (mode, op0);
3994   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3995   expand_inc (temp, gen_int_mode (d - 1, mode));
3996   emit_label (label);
3997   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3998 }
3999 \f
4000 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
4001    if that is convenient, and returning where the result is.
4002    You may request either the quotient or the remainder as the result;
4003    specify REM_FLAG nonzero to get the remainder.
4004
4005    CODE is the expression code for which kind of division this is;
4006    it controls how rounding is done.  MODE is the machine mode to use.
4007    UNSIGNEDP nonzero means do unsigned division.  */
4008
4009 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4010    and then correct it by or'ing in missing high bits
4011    if result of ANDI is nonzero.
4012    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4013    This could optimize to a bfexts instruction.
4014    But C doesn't use these operations, so their optimizations are
4015    left for later.  */
4016 /* ??? For modulo, we don't actually need the highpart of the first product,
4017    the low part will do nicely.  And for small divisors, the second multiply
4018    can also be a low-part only multiply or even be completely left out.
4019    E.g. to calculate the remainder of a division by 3 with a 32 bit
4020    multiply, multiply with 0x55555556 and extract the upper two bits;
4021    the result is exact for inputs up to 0x1fffffff.
4022    The input range can be reduced by using cross-sum rules.
4023    For odd divisors >= 3, the following table gives right shift counts
4024    so that if a number is shifted by an integer multiple of the given
4025    amount, the remainder stays the same:
4026    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4027    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4028    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4029    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4030    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4031
4032    Cross-sum rules for even numbers can be derived by leaving as many bits
4033    to the right alone as the divisor has zeros to the right.
4034    E.g. if x is an unsigned 32 bit number:
4035    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4036    */
4037
4038 rtx
4039 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4040                rtx op0, rtx op1, rtx target, int unsignedp)
4041 {
4042   machine_mode compute_mode;
4043   rtx tquotient;
4044   rtx quotient = 0, remainder = 0;
4045   rtx_insn *last;
4046   int size;
4047   rtx_insn *insn;
4048   optab optab1, optab2;
4049   int op1_is_constant, op1_is_pow2 = 0;
4050   int max_cost, extra_cost;
4051   static HOST_WIDE_INT last_div_const = 0;
4052   bool speed = optimize_insn_for_speed_p ();
4053
4054   op1_is_constant = CONST_INT_P (op1);
4055   if (op1_is_constant)
4056     {
4057       wide_int ext_op1 = rtx_mode_t (op1, mode);
4058       op1_is_pow2 = (wi::popcount (ext_op1) == 1
4059                      || (! unsignedp
4060                          && wi::popcount (wi::neg (ext_op1)) == 1));
4061     }
4062
4063   /*
4064      This is the structure of expand_divmod:
4065
4066      First comes code to fix up the operands so we can perform the operations
4067      correctly and efficiently.
4068
4069      Second comes a switch statement with code specific for each rounding mode.
4070      For some special operands this code emits all RTL for the desired
4071      operation, for other cases, it generates only a quotient and stores it in
4072      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4073      to indicate that it has not done anything.
4074
4075      Last comes code that finishes the operation.  If QUOTIENT is set and
4076      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4077      QUOTIENT is not set, it is computed using trunc rounding.
4078
4079      We try to generate special code for division and remainder when OP1 is a
4080      constant.  If |OP1| = 2**n we can use shifts and some other fast
4081      operations.  For other values of OP1, we compute a carefully selected
4082      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4083      by m.
4084
4085      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4086      half of the product.  Different strategies for generating the product are
4087      implemented in expmed_mult_highpart.
4088
4089      If what we actually want is the remainder, we generate that by another
4090      by-constant multiplication and a subtraction.  */
4091
4092   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4093      code below will malfunction if we are, so check here and handle
4094      the special case if so.  */
4095   if (op1 == const1_rtx)
4096     return rem_flag ? const0_rtx : op0;
4097
4098     /* When dividing by -1, we could get an overflow.
4099      negv_optab can handle overflows.  */
4100   if (! unsignedp && op1 == constm1_rtx)
4101     {
4102       if (rem_flag)
4103         return const0_rtx;
4104       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4105                           ? negv_optab : neg_optab, op0, target, 0);
4106     }
4107
4108   if (target
4109       /* Don't use the function value register as a target
4110          since we have to read it as well as write it,
4111          and function-inlining gets confused by this.  */
4112       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4113           /* Don't clobber an operand while doing a multi-step calculation.  */
4114           || ((rem_flag || op1_is_constant)
4115               && (reg_mentioned_p (target, op0)
4116                   || (MEM_P (op0) && MEM_P (target))))
4117           || reg_mentioned_p (target, op1)
4118           || (MEM_P (op1) && MEM_P (target))))
4119     target = 0;
4120
4121   /* Get the mode in which to perform this computation.  Normally it will
4122      be MODE, but sometimes we can't do the desired operation in MODE.
4123      If so, pick a wider mode in which we can do the operation.  Convert
4124      to that mode at the start to avoid repeated conversions.
4125
4126      First see what operations we need.  These depend on the expression
4127      we are evaluating.  (We assume that divxx3 insns exist under the
4128      same conditions that modxx3 insns and that these insns don't normally
4129      fail.  If these assumptions are not correct, we may generate less
4130      efficient code in some cases.)
4131
4132      Then see if we find a mode in which we can open-code that operation
4133      (either a division, modulus, or shift).  Finally, check for the smallest
4134      mode for which we can do the operation with a library call.  */
4135
4136   /* We might want to refine this now that we have division-by-constant
4137      optimization.  Since expmed_mult_highpart tries so many variants, it is
4138      not straightforward to generalize this.  Maybe we should make an array
4139      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4140
4141   optab1 = (op1_is_pow2
4142             ? (unsignedp ? lshr_optab : ashr_optab)
4143             : (unsignedp ? udiv_optab : sdiv_optab));
4144   optab2 = (op1_is_pow2 ? optab1
4145             : (unsignedp ? udivmod_optab : sdivmod_optab));
4146
4147   FOR_EACH_MODE_FROM (compute_mode, mode)
4148     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4149         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4150       break;
4151
4152   if (compute_mode == VOIDmode)
4153     FOR_EACH_MODE_FROM (compute_mode, mode)
4154       if (optab_libfunc (optab1, compute_mode)
4155           || optab_libfunc (optab2, compute_mode))
4156         break;
4157
4158   /* If we still couldn't find a mode, use MODE, but expand_binop will
4159      probably die.  */
4160   if (compute_mode == VOIDmode)
4161     compute_mode = mode;
4162
4163   if (target && GET_MODE (target) == compute_mode)
4164     tquotient = target;
4165   else
4166     tquotient = gen_reg_rtx (compute_mode);
4167
4168   size = GET_MODE_BITSIZE (compute_mode);
4169 #if 0
4170   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4171      (mode), and thereby get better code when OP1 is a constant.  Do that
4172      later.  It will require going over all usages of SIZE below.  */
4173   size = GET_MODE_BITSIZE (mode);
4174 #endif
4175
4176   /* Only deduct something for a REM if the last divide done was
4177      for a different constant.   Then set the constant of the last
4178      divide.  */
4179   max_cost = (unsignedp
4180               ? udiv_cost (speed, compute_mode)
4181               : sdiv_cost (speed, compute_mode));
4182   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4183                      && INTVAL (op1) == last_div_const))
4184     max_cost -= (mul_cost (speed, compute_mode)
4185                  + add_cost (speed, compute_mode));
4186
4187   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4188
4189   /* Now convert to the best mode to use.  */
4190   if (compute_mode != mode)
4191     {
4192       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4193       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4194
4195       /* convert_modes may have placed op1 into a register, so we
4196          must recompute the following.  */
4197       op1_is_constant = CONST_INT_P (op1);
4198       if (op1_is_constant)
4199         {
4200           wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4201           op1_is_pow2 = (wi::popcount (ext_op1) == 1
4202                          || (! unsignedp
4203                              && wi::popcount (wi::neg (ext_op1)) == 1));
4204         }
4205       else
4206         op1_is_pow2 = 0;
4207     }
4208
4209   /* If one of the operands is a volatile MEM, copy it into a register.  */
4210
4211   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4212     op0 = force_reg (compute_mode, op0);
4213   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4214     op1 = force_reg (compute_mode, op1);
4215
4216   /* If we need the remainder or if OP1 is constant, we need to
4217      put OP0 in a register in case it has any queued subexpressions.  */
4218   if (rem_flag || op1_is_constant)
4219     op0 = force_reg (compute_mode, op0);
4220
4221   last = get_last_insn ();
4222
4223   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4224   if (unsignedp)
4225     {
4226       if (code == FLOOR_DIV_EXPR)
4227         code = TRUNC_DIV_EXPR;
4228       if (code == FLOOR_MOD_EXPR)
4229         code = TRUNC_MOD_EXPR;
4230       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4231         code = TRUNC_DIV_EXPR;
4232     }
4233
4234   if (op1 != const0_rtx)
4235     switch (code)
4236       {
4237       case TRUNC_MOD_EXPR:
4238       case TRUNC_DIV_EXPR:
4239         if (op1_is_constant)
4240           {
4241             if (unsignedp)
4242               {
4243                 unsigned HOST_WIDE_INT mh, ml;
4244                 int pre_shift, post_shift;
4245                 int dummy;
4246                 wide_int wd = rtx_mode_t (op1, compute_mode);
4247                 unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4248
4249                 if (wi::popcount (wd) == 1)
4250                   {
4251                     pre_shift = floor_log2 (d);
4252                     if (rem_flag)
4253                       {
4254                         unsigned HOST_WIDE_INT mask
4255                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4256                         remainder
4257                           = expand_binop (compute_mode, and_optab, op0,
4258                                           gen_int_mode (mask, compute_mode),
4259                                           remainder, 1,
4260                                           OPTAB_LIB_WIDEN);
4261                         if (remainder)
4262                           return gen_lowpart (mode, remainder);
4263                       }
4264                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4265                                              pre_shift, tquotient, 1);
4266                   }
4267                 else if (size <= HOST_BITS_PER_WIDE_INT)
4268                   {
4269                     if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4270                       {
4271                         /* Most significant bit of divisor is set; emit an scc
4272                            insn.  */
4273                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4274                                                           compute_mode, 1, 1);
4275                       }
4276                     else
4277                       {
4278                         /* Find a suitable multiplier and right shift count
4279                            instead of multiplying with D.  */
4280
4281                         mh = choose_multiplier (d, size, size,
4282                                                 &ml, &post_shift, &dummy);
4283
4284                         /* If the suggested multiplier is more than SIZE bits,
4285                            we can do better for even divisors, using an
4286                            initial right shift.  */
4287                         if (mh != 0 && (d & 1) == 0)
4288                           {
4289                             pre_shift = ctz_or_zero (d);
4290                             mh = choose_multiplier (d >> pre_shift, size,
4291                                                     size - pre_shift,
4292                                                     &ml, &post_shift, &dummy);
4293                             gcc_assert (!mh);
4294                           }
4295                         else
4296                           pre_shift = 0;
4297
4298                         if (mh != 0)
4299                           {
4300                             rtx t1, t2, t3, t4;
4301
4302                             if (post_shift - 1 >= BITS_PER_WORD)
4303                               goto fail1;
4304
4305                             extra_cost
4306                               = (shift_cost (speed, compute_mode, post_shift - 1)
4307                                  + shift_cost (speed, compute_mode, 1)
4308                                  + 2 * add_cost (speed, compute_mode));
4309                             t1 = expmed_mult_highpart
4310                               (compute_mode, op0,
4311                                gen_int_mode (ml, compute_mode),
4312                                NULL_RTX, 1, max_cost - extra_cost);
4313                             if (t1 == 0)
4314                               goto fail1;
4315                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4316                                                                op0, t1),
4317                                                 NULL_RTX);
4318                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4319                                                t2, 1, NULL_RTX, 1);
4320                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4321                                                               t1, t3),
4322                                                 NULL_RTX);
4323                             quotient = expand_shift
4324                               (RSHIFT_EXPR, compute_mode, t4,
4325                                post_shift - 1, tquotient, 1);
4326                           }
4327                         else
4328                           {
4329                             rtx t1, t2;
4330
4331                             if (pre_shift >= BITS_PER_WORD
4332                                 || post_shift >= BITS_PER_WORD)
4333                               goto fail1;
4334
4335                             t1 = expand_shift
4336                               (RSHIFT_EXPR, compute_mode, op0,
4337                                pre_shift, NULL_RTX, 1);
4338                             extra_cost
4339                               = (shift_cost (speed, compute_mode, pre_shift)
4340                                  + shift_cost (speed, compute_mode, post_shift));
4341                             t2 = expmed_mult_highpart
4342                               (compute_mode, t1,
4343                                gen_int_mode (ml, compute_mode),
4344                                NULL_RTX, 1, max_cost - extra_cost);
4345                             if (t2 == 0)
4346                               goto fail1;
4347                             quotient = expand_shift
4348                               (RSHIFT_EXPR, compute_mode, t2,
4349                                post_shift, tquotient, 1);
4350                           }
4351                       }
4352                   }
4353                 else            /* Too wide mode to use tricky code */
4354                   break;
4355
4356                 insn = get_last_insn ();
4357                 if (insn != last)
4358                   set_dst_reg_note (insn, REG_EQUAL,
4359                                     gen_rtx_UDIV (compute_mode, op0, op1),
4360                                     quotient);
4361               }
4362             else                /* TRUNC_DIV, signed */
4363               {
4364                 unsigned HOST_WIDE_INT ml;
4365                 int lgup, post_shift;
4366                 rtx mlr;
4367                 HOST_WIDE_INT d = INTVAL (op1);
4368                 unsigned HOST_WIDE_INT abs_d;
4369
4370                 /* Since d might be INT_MIN, we have to cast to
4371                    unsigned HOST_WIDE_INT before negating to avoid
4372                    undefined signed overflow.  */
4373                 abs_d = (d >= 0
4374                          ? (unsigned HOST_WIDE_INT) d
4375                          : - (unsigned HOST_WIDE_INT) d);
4376
4377                 /* n rem d = n rem -d */
4378                 if (rem_flag && d < 0)
4379                   {
4380                     d = abs_d;
4381                     op1 = gen_int_mode (abs_d, compute_mode);
4382                   }
4383
4384                 if (d == 1)
4385                   quotient = op0;
4386                 else if (d == -1)
4387                   quotient = expand_unop (compute_mode, neg_optab, op0,
4388                                           tquotient, 0);
4389                 else if (size <= HOST_BITS_PER_WIDE_INT
4390                          && abs_d == HOST_WIDE_INT_1U << (size - 1))
4391                   {
4392                     /* This case is not handled correctly below.  */
4393                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4394                                                 compute_mode, 1, 1);
4395                     if (quotient == 0)
4396                       goto fail1;
4397                   }
4398                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4399                          && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4400                          && (rem_flag
4401                              ? smod_pow2_cheap (speed, compute_mode)
4402                              : sdiv_pow2_cheap (speed, compute_mode))
4403                          /* We assume that cheap metric is true if the
4404                             optab has an expander for this mode.  */
4405                          && ((optab_handler ((rem_flag ? smod_optab
4406                                               : sdiv_optab),
4407                                              compute_mode)
4408                               != CODE_FOR_nothing)
4409                              || (optab_handler (sdivmod_optab,
4410                                                 compute_mode)
4411                                  != CODE_FOR_nothing)))
4412                   ;
4413                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d)
4414                          && (size <= HOST_BITS_PER_WIDE_INT
4415                              || abs_d != (unsigned HOST_WIDE_INT) d))
4416                   {
4417                     if (rem_flag)
4418                       {
4419                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4420                         if (remainder)
4421                           return gen_lowpart (mode, remainder);
4422                       }
4423
4424                     if (sdiv_pow2_cheap (speed, compute_mode)
4425                         && ((optab_handler (sdiv_optab, compute_mode)
4426                              != CODE_FOR_nothing)
4427                             || (optab_handler (sdivmod_optab, compute_mode)
4428                                 != CODE_FOR_nothing)))
4429                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4430                                                 compute_mode, op0,
4431                                                 gen_int_mode (abs_d,
4432                                                               compute_mode),
4433                                                 NULL_RTX, 0);
4434                     else
4435                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4436
4437                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4438                        negate the quotient.  */
4439                     if (d < 0)
4440                       {
4441                         insn = get_last_insn ();
4442                         if (insn != last
4443                             && abs_d < (HOST_WIDE_INT_1U
4444                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4445                           set_dst_reg_note (insn, REG_EQUAL,
4446                                             gen_rtx_DIV (compute_mode, op0,
4447                                                          gen_int_mode
4448                                                            (abs_d,
4449                                                             compute_mode)),
4450                                             quotient);
4451
4452                         quotient = expand_unop (compute_mode, neg_optab,
4453                                                 quotient, quotient, 0);
4454                       }
4455                   }
4456                 else if (size <= HOST_BITS_PER_WIDE_INT)
4457                   {
4458                     choose_multiplier (abs_d, size, size - 1,
4459                                        &ml, &post_shift, &lgup);
4460                     if (ml < HOST_WIDE_INT_1U << (size - 1))
4461                       {
4462                         rtx t1, t2, t3;
4463
4464                         if (post_shift >= BITS_PER_WORD
4465                             || size - 1 >= BITS_PER_WORD)
4466                           goto fail1;
4467
4468                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4469                                       + shift_cost (speed, compute_mode, size - 1)
4470                                       + add_cost (speed, compute_mode));
4471                         t1 = expmed_mult_highpart
4472                           (compute_mode, op0, gen_int_mode (ml, compute_mode),
4473                            NULL_RTX, 0, max_cost - extra_cost);
4474                         if (t1 == 0)
4475                           goto fail1;
4476                         t2 = expand_shift
4477                           (RSHIFT_EXPR, compute_mode, t1,
4478                            post_shift, NULL_RTX, 0);
4479                         t3 = expand_shift
4480                           (RSHIFT_EXPR, compute_mode, op0,
4481                            size - 1, NULL_RTX, 0);
4482                         if (d < 0)
4483                           quotient
4484                             = force_operand (gen_rtx_MINUS (compute_mode,
4485                                                             t3, t2),
4486                                              tquotient);
4487                         else
4488                           quotient
4489                             = force_operand (gen_rtx_MINUS (compute_mode,
4490                                                             t2, t3),
4491                                              tquotient);
4492                       }
4493                     else
4494                       {
4495                         rtx t1, t2, t3, t4;
4496
4497                         if (post_shift >= BITS_PER_WORD
4498                             || size - 1 >= BITS_PER_WORD)
4499                           goto fail1;
4500
4501                         ml |= HOST_WIDE_INT_M1U << (size - 1);
4502                         mlr = gen_int_mode (ml, compute_mode);
4503                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4504                                       + shift_cost (speed, compute_mode, size - 1)
4505                                       + 2 * add_cost (speed, compute_mode));
4506                         t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4507                                                    NULL_RTX, 0,
4508                                                    max_cost - extra_cost);
4509                         if (t1 == 0)
4510                           goto fail1;
4511                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4512                                                           t1, op0),
4513                                             NULL_RTX);
4514                         t3 = expand_shift
4515                           (RSHIFT_EXPR, compute_mode, t2,
4516                            post_shift, NULL_RTX, 0);
4517                         t4 = expand_shift
4518                           (RSHIFT_EXPR, compute_mode, op0,
4519                            size - 1, NULL_RTX, 0);
4520                         if (d < 0)
4521                           quotient
4522                             = force_operand (gen_rtx_MINUS (compute_mode,
4523                                                             t4, t3),
4524                                              tquotient);
4525                         else
4526                           quotient
4527                             = force_operand (gen_rtx_MINUS (compute_mode,
4528                                                             t3, t4),
4529                                              tquotient);
4530                       }
4531                   }
4532                 else            /* Too wide mode to use tricky code */
4533                   break;
4534
4535                 insn = get_last_insn ();
4536                 if (insn != last)
4537                   set_dst_reg_note (insn, REG_EQUAL,
4538                                     gen_rtx_DIV (compute_mode, op0, op1),
4539                                     quotient);
4540               }
4541             break;
4542           }
4543       fail1:
4544         delete_insns_since (last);
4545         break;
4546
4547       case FLOOR_DIV_EXPR:
4548       case FLOOR_MOD_EXPR:
4549       /* We will come here only for signed operations.  */
4550         if (op1_is_constant && size <= HOST_BITS_PER_WIDE_INT)
4551           {
4552             unsigned HOST_WIDE_INT mh, ml;
4553             int pre_shift, lgup, post_shift;
4554             HOST_WIDE_INT d = INTVAL (op1);
4555
4556             if (d > 0)
4557               {
4558                 /* We could just as easily deal with negative constants here,
4559                    but it does not seem worth the trouble for GCC 2.6.  */
4560                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4561                   {
4562                     pre_shift = floor_log2 (d);
4563                     if (rem_flag)
4564                       {
4565                         unsigned HOST_WIDE_INT mask
4566                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4567                         remainder = expand_binop
4568                           (compute_mode, and_optab, op0,
4569                            gen_int_mode (mask, compute_mode),
4570                            remainder, 0, OPTAB_LIB_WIDEN);
4571                         if (remainder)
4572                           return gen_lowpart (mode, remainder);
4573                       }
4574                     quotient = expand_shift
4575                       (RSHIFT_EXPR, compute_mode, op0,
4576                        pre_shift, tquotient, 0);
4577                   }
4578                 else
4579                   {
4580                     rtx t1, t2, t3, t4;
4581
4582                     mh = choose_multiplier (d, size, size - 1,
4583                                             &ml, &post_shift, &lgup);
4584                     gcc_assert (!mh);
4585
4586                     if (post_shift < BITS_PER_WORD
4587                         && size - 1 < BITS_PER_WORD)
4588                       {
4589                         t1 = expand_shift
4590                           (RSHIFT_EXPR, compute_mode, op0,
4591                            size - 1, NULL_RTX, 0);
4592                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4593                                            NULL_RTX, 0, OPTAB_WIDEN);
4594                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4595                                       + shift_cost (speed, compute_mode, size - 1)
4596                                       + 2 * add_cost (speed, compute_mode));
4597                         t3 = expmed_mult_highpart
4598                           (compute_mode, t2, gen_int_mode (ml, compute_mode),
4599                            NULL_RTX, 1, max_cost - extra_cost);
4600                         if (t3 != 0)
4601                           {
4602                             t4 = expand_shift
4603                               (RSHIFT_EXPR, compute_mode, t3,
4604                                post_shift, NULL_RTX, 1);
4605                             quotient = expand_binop (compute_mode, xor_optab,
4606                                                      t4, t1, tquotient, 0,
4607                                                      OPTAB_WIDEN);
4608                           }
4609                       }
4610                   }
4611               }
4612             else
4613               {
4614                 rtx nsign, t1, t2, t3, t4;
4615                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4616                                                   op0, constm1_rtx), NULL_RTX);
4617                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4618                                    0, OPTAB_WIDEN);
4619                 nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
4620                                       size - 1, NULL_RTX, 0);
4621                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4622                                     NULL_RTX);
4623                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4624                                     NULL_RTX, 0);
4625                 if (t4)
4626                   {
4627                     rtx t5;
4628                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4629                                       NULL_RTX, 0);
4630                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4631                                                             t4, t5),
4632                                               tquotient);
4633                   }
4634               }
4635           }
4636
4637         if (quotient != 0)
4638           break;
4639         delete_insns_since (last);
4640
4641         /* Try using an instruction that produces both the quotient and
4642            remainder, using truncation.  We can easily compensate the quotient
4643            or remainder to get floor rounding, once we have the remainder.
4644            Notice that we compute also the final remainder value here,
4645            and return the result right away.  */
4646         if (target == 0 || GET_MODE (target) != compute_mode)
4647           target = gen_reg_rtx (compute_mode);
4648
4649         if (rem_flag)
4650           {
4651             remainder
4652               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4653             quotient = gen_reg_rtx (compute_mode);
4654           }
4655         else
4656           {
4657             quotient
4658               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4659             remainder = gen_reg_rtx (compute_mode);
4660           }
4661
4662         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4663                                  quotient, remainder, 0))
4664           {
4665             /* This could be computed with a branch-less sequence.
4666                Save that for later.  */
4667             rtx tem;
4668             rtx_code_label *label = gen_label_rtx ();
4669             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4670             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4671                                 NULL_RTX, 0, OPTAB_WIDEN);
4672             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4673             expand_dec (quotient, const1_rtx);
4674             expand_inc (remainder, op1);
4675             emit_label (label);
4676             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4677           }
4678
4679         /* No luck with division elimination or divmod.  Have to do it
4680            by conditionally adjusting op0 *and* the result.  */
4681         {
4682           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4683           rtx adjusted_op0;
4684           rtx tem;
4685
4686           quotient = gen_reg_rtx (compute_mode);
4687           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4688           label1 = gen_label_rtx ();
4689           label2 = gen_label_rtx ();
4690           label3 = gen_label_rtx ();
4691           label4 = gen_label_rtx ();
4692           label5 = gen_label_rtx ();
4693           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4694           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4695           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4696                               quotient, 0, OPTAB_LIB_WIDEN);
4697           if (tem != quotient)
4698             emit_move_insn (quotient, tem);
4699           emit_jump_insn (targetm.gen_jump (label5));
4700           emit_barrier ();
4701           emit_label (label1);
4702           expand_inc (adjusted_op0, const1_rtx);
4703           emit_jump_insn (targetm.gen_jump (label4));
4704           emit_barrier ();
4705           emit_label (label2);
4706           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4707           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4708                               quotient, 0, OPTAB_LIB_WIDEN);
4709           if (tem != quotient)
4710             emit_move_insn (quotient, tem);
4711           emit_jump_insn (targetm.gen_jump (label5));
4712           emit_barrier ();
4713           emit_label (label3);
4714           expand_dec (adjusted_op0, const1_rtx);
4715           emit_label (label4);
4716           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4717                               quotient, 0, OPTAB_LIB_WIDEN);
4718           if (tem != quotient)
4719             emit_move_insn (quotient, tem);
4720           expand_dec (quotient, const1_rtx);
4721           emit_label (label5);
4722         }
4723         break;
4724
4725       case CEIL_DIV_EXPR:
4726       case CEIL_MOD_EXPR:
4727         if (unsignedp)
4728           {
4729             if (op1_is_constant
4730                 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4731                 && (size <= HOST_BITS_PER_WIDE_INT
4732                     || INTVAL (op1) >= 0))
4733               {
4734                 rtx t1, t2, t3;
4735                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4736                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4737                                    floor_log2 (d), tquotient, 1);
4738                 t2 = expand_binop (compute_mode, and_optab, op0,
4739                                    gen_int_mode (d - 1, compute_mode),
4740                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4741                 t3 = gen_reg_rtx (compute_mode);
4742                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4743                                       compute_mode, 1, 1);
4744                 if (t3 == 0)
4745                   {
4746                     rtx_code_label *lab;
4747                     lab = gen_label_rtx ();
4748                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4749                     expand_inc (t1, const1_rtx);
4750                     emit_label (lab);
4751                     quotient = t1;
4752                   }
4753                 else
4754                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4755                                                           t1, t3),
4756                                             tquotient);
4757                 break;
4758               }
4759
4760             /* Try using an instruction that produces both the quotient and
4761                remainder, using truncation.  We can easily compensate the
4762                quotient or remainder to get ceiling rounding, once we have the
4763                remainder.  Notice that we compute also the final remainder
4764                value here, and return the result right away.  */
4765             if (target == 0 || GET_MODE (target) != compute_mode)
4766               target = gen_reg_rtx (compute_mode);
4767
4768             if (rem_flag)
4769               {
4770                 remainder = (REG_P (target)
4771                              ? target : gen_reg_rtx (compute_mode));
4772                 quotient = gen_reg_rtx (compute_mode);
4773               }
4774             else
4775               {
4776                 quotient = (REG_P (target)
4777                             ? target : gen_reg_rtx (compute_mode));
4778                 remainder = gen_reg_rtx (compute_mode);
4779               }
4780
4781             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4782                                      remainder, 1))
4783               {
4784                 /* This could be computed with a branch-less sequence.
4785                    Save that for later.  */
4786                 rtx_code_label *label = gen_label_rtx ();
4787                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4788                                  compute_mode, label);
4789                 expand_inc (quotient, const1_rtx);
4790                 expand_dec (remainder, op1);
4791                 emit_label (label);
4792                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4793               }
4794
4795             /* No luck with division elimination or divmod.  Have to do it
4796                by conditionally adjusting op0 *and* the result.  */
4797             {
4798               rtx_code_label *label1, *label2;
4799               rtx adjusted_op0, tem;
4800
4801               quotient = gen_reg_rtx (compute_mode);
4802               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4803               label1 = gen_label_rtx ();
4804               label2 = gen_label_rtx ();
4805               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4806                                compute_mode, label1);
4807               emit_move_insn  (quotient, const0_rtx);
4808               emit_jump_insn (targetm.gen_jump (label2));
4809               emit_barrier ();
4810               emit_label (label1);
4811               expand_dec (adjusted_op0, const1_rtx);
4812               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4813                                   quotient, 1, OPTAB_LIB_WIDEN);
4814               if (tem != quotient)
4815                 emit_move_insn (quotient, tem);
4816               expand_inc (quotient, const1_rtx);
4817               emit_label (label2);
4818             }
4819           }
4820         else /* signed */
4821           {
4822             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4823                 && INTVAL (op1) >= 0)
4824               {
4825                 /* This is extremely similar to the code for the unsigned case
4826                    above.  For 2.7 we should merge these variants, but for
4827                    2.6.1 I don't want to touch the code for unsigned since that
4828                    get used in C.  The signed case will only be used by other
4829                    languages (Ada).  */
4830
4831                 rtx t1, t2, t3;
4832                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4833                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4834                                    floor_log2 (d), tquotient, 0);
4835                 t2 = expand_binop (compute_mode, and_optab, op0,
4836                                    gen_int_mode (d - 1, compute_mode),
4837                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4838                 t3 = gen_reg_rtx (compute_mode);
4839                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4840                                       compute_mode, 1, 1);
4841                 if (t3 == 0)
4842                   {
4843                     rtx_code_label *lab;
4844                     lab = gen_label_rtx ();
4845                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4846                     expand_inc (t1, const1_rtx);
4847                     emit_label (lab);
4848                     quotient = t1;
4849                   }
4850                 else
4851                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4852                                                           t1, t3),
4853                                             tquotient);
4854                 break;
4855               }
4856
4857             /* Try using an instruction that produces both the quotient and
4858                remainder, using truncation.  We can easily compensate the
4859                quotient or remainder to get ceiling rounding, once we have the
4860                remainder.  Notice that we compute also the final remainder
4861                value here, and return the result right away.  */
4862             if (target == 0 || GET_MODE (target) != compute_mode)
4863               target = gen_reg_rtx (compute_mode);
4864             if (rem_flag)
4865               {
4866                 remainder= (REG_P (target)
4867                             ? target : gen_reg_rtx (compute_mode));
4868                 quotient = gen_reg_rtx (compute_mode);
4869               }
4870             else
4871               {
4872                 quotient = (REG_P (target)
4873                             ? target : gen_reg_rtx (compute_mode));
4874                 remainder = gen_reg_rtx (compute_mode);
4875               }
4876
4877             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4878                                      remainder, 0))
4879               {
4880                 /* This could be computed with a branch-less sequence.
4881                    Save that for later.  */
4882                 rtx tem;
4883                 rtx_code_label *label = gen_label_rtx ();
4884                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4885                                  compute_mode, label);
4886                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4887                                     NULL_RTX, 0, OPTAB_WIDEN);
4888                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4889                 expand_inc (quotient, const1_rtx);
4890                 expand_dec (remainder, op1);
4891                 emit_label (label);
4892                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4893               }
4894
4895             /* No luck with division elimination or divmod.  Have to do it
4896                by conditionally adjusting op0 *and* the result.  */
4897             {
4898               rtx_code_label *label1, *label2, *label3, *label4, *label5;
4899               rtx adjusted_op0;
4900               rtx tem;
4901
4902               quotient = gen_reg_rtx (compute_mode);
4903               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4904               label1 = gen_label_rtx ();
4905               label2 = gen_label_rtx ();
4906               label3 = gen_label_rtx ();
4907               label4 = gen_label_rtx ();
4908               label5 = gen_label_rtx ();
4909               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4910               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4911                                compute_mode, label1);
4912               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4913                                   quotient, 0, OPTAB_LIB_WIDEN);
4914               if (tem != quotient)
4915                 emit_move_insn (quotient, tem);
4916               emit_jump_insn (targetm.gen_jump (label5));
4917               emit_barrier ();
4918               emit_label (label1);
4919               expand_dec (adjusted_op0, const1_rtx);
4920               emit_jump_insn (targetm.gen_jump (label4));
4921               emit_barrier ();
4922               emit_label (label2);
4923               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4924                                compute_mode, label3);
4925               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4926                                   quotient, 0, OPTAB_LIB_WIDEN);
4927               if (tem != quotient)
4928                 emit_move_insn (quotient, tem);
4929               emit_jump_insn (targetm.gen_jump (label5));
4930               emit_barrier ();
4931               emit_label (label3);
4932               expand_inc (adjusted_op0, const1_rtx);
4933               emit_label (label4);
4934               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4935                                   quotient, 0, OPTAB_LIB_WIDEN);
4936               if (tem != quotient)
4937                 emit_move_insn (quotient, tem);
4938               expand_inc (quotient, const1_rtx);
4939               emit_label (label5);
4940             }
4941           }
4942         break;
4943
4944       case EXACT_DIV_EXPR:
4945         if (op1_is_constant && size <= HOST_BITS_PER_WIDE_INT)
4946           {
4947             HOST_WIDE_INT d = INTVAL (op1);
4948             unsigned HOST_WIDE_INT ml;
4949             int pre_shift;
4950             rtx t1;
4951
4952             pre_shift = ctz_or_zero (d);
4953             ml = invert_mod2n (d >> pre_shift, size);
4954             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4955                                pre_shift, NULL_RTX, unsignedp);
4956             quotient = expand_mult (compute_mode, t1,
4957                                     gen_int_mode (ml, compute_mode),
4958                                     NULL_RTX, 1);
4959
4960             insn = get_last_insn ();
4961             set_dst_reg_note (insn, REG_EQUAL,
4962                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4963                                               compute_mode, op0, op1),
4964                               quotient);
4965           }
4966         break;
4967
4968       case ROUND_DIV_EXPR:
4969       case ROUND_MOD_EXPR:
4970         if (unsignedp)
4971           {
4972             rtx tem;
4973             rtx_code_label *label;
4974             label = gen_label_rtx ();
4975             quotient = gen_reg_rtx (compute_mode);
4976             remainder = gen_reg_rtx (compute_mode);
4977             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4978               {
4979                 rtx tem;
4980                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4981                                          quotient, 1, OPTAB_LIB_WIDEN);
4982                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4983                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4984                                           remainder, 1, OPTAB_LIB_WIDEN);
4985               }
4986             tem = plus_constant (compute_mode, op1, -1);
4987             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4988             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4989             expand_inc (quotient, const1_rtx);
4990             expand_dec (remainder, op1);
4991             emit_label (label);
4992           }
4993         else
4994           {
4995             rtx abs_rem, abs_op1, tem, mask;
4996             rtx_code_label *label;
4997             label = gen_label_rtx ();
4998             quotient = gen_reg_rtx (compute_mode);
4999             remainder = gen_reg_rtx (compute_mode);
5000             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
5001               {
5002                 rtx tem;
5003                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
5004                                          quotient, 0, OPTAB_LIB_WIDEN);
5005                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
5006                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
5007                                           remainder, 0, OPTAB_LIB_WIDEN);
5008               }
5009             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
5010             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
5011             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
5012                                 1, NULL_RTX, 1);
5013             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
5014             tem = expand_binop (compute_mode, xor_optab, op0, op1,
5015                                 NULL_RTX, 0, OPTAB_WIDEN);
5016             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
5017                                  size - 1, NULL_RTX, 0);
5018             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
5019                                 NULL_RTX, 0, OPTAB_WIDEN);
5020             tem = expand_binop (compute_mode, sub_optab, tem, mask,
5021                                 NULL_RTX, 0, OPTAB_WIDEN);
5022             expand_inc (quotient, tem);
5023             tem = expand_binop (compute_mode, xor_optab, mask, op1,
5024                                 NULL_RTX, 0, OPTAB_WIDEN);
5025             tem = expand_binop (compute_mode, sub_optab, tem, mask,
5026                                 NULL_RTX, 0, OPTAB_WIDEN);
5027             expand_dec (remainder, tem);
5028             emit_label (label);
5029           }
5030         return gen_lowpart (mode, rem_flag ? remainder : quotient);
5031
5032       default:
5033         gcc_unreachable ();
5034       }
5035
5036   if (quotient == 0)
5037     {
5038       if (target && GET_MODE (target) != compute_mode)
5039         target = 0;
5040
5041       if (rem_flag)
5042         {
5043           /* Try to produce the remainder without producing the quotient.
5044              If we seem to have a divmod pattern that does not require widening,
5045              don't try widening here.  We should really have a WIDEN argument
5046              to expand_twoval_binop, since what we'd really like to do here is
5047              1) try a mod insn in compute_mode
5048              2) try a divmod insn in compute_mode
5049              3) try a div insn in compute_mode and multiply-subtract to get
5050                 remainder
5051              4) try the same things with widening allowed.  */
5052           remainder
5053             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5054                                  op0, op1, target,
5055                                  unsignedp,
5056                                  ((optab_handler (optab2, compute_mode)
5057                                    != CODE_FOR_nothing)
5058                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
5059           if (remainder == 0)
5060             {
5061               /* No luck there.  Can we do remainder and divide at once
5062                  without a library call?  */
5063               remainder = gen_reg_rtx (compute_mode);
5064               if (! expand_twoval_binop ((unsignedp
5065                                           ? udivmod_optab
5066                                           : sdivmod_optab),
5067                                          op0, op1,
5068                                          NULL_RTX, remainder, unsignedp))
5069                 remainder = 0;
5070             }
5071
5072           if (remainder)
5073             return gen_lowpart (mode, remainder);
5074         }
5075
5076       /* Produce the quotient.  Try a quotient insn, but not a library call.
5077          If we have a divmod in this mode, use it in preference to widening
5078          the div (for this test we assume it will not fail). Note that optab2
5079          is set to the one of the two optabs that the call below will use.  */
5080       quotient
5081         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5082                              op0, op1, rem_flag ? NULL_RTX : target,
5083                              unsignedp,
5084                              ((optab_handler (optab2, compute_mode)
5085                                != CODE_FOR_nothing)
5086                               ? OPTAB_DIRECT : OPTAB_WIDEN));
5087
5088       if (quotient == 0)
5089         {
5090           /* No luck there.  Try a quotient-and-remainder insn,
5091              keeping the quotient alone.  */
5092           quotient = gen_reg_rtx (compute_mode);
5093           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5094                                      op0, op1,
5095                                      quotient, NULL_RTX, unsignedp))
5096             {
5097               quotient = 0;
5098               if (! rem_flag)
5099                 /* Still no luck.  If we are not computing the remainder,
5100                    use a library call for the quotient.  */
5101                 quotient = sign_expand_binop (compute_mode,
5102                                               udiv_optab, sdiv_optab,
5103                                               op0, op1, target,
5104                                               unsignedp, OPTAB_LIB_WIDEN);
5105             }
5106         }
5107     }
5108
5109   if (rem_flag)
5110     {
5111       if (target && GET_MODE (target) != compute_mode)
5112         target = 0;
5113
5114       if (quotient == 0)
5115         {
5116           /* No divide instruction either.  Use library for remainder.  */
5117           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5118                                          op0, op1, target,
5119                                          unsignedp, OPTAB_LIB_WIDEN);
5120           /* No remainder function.  Try a quotient-and-remainder
5121              function, keeping the remainder.  */
5122           if (!remainder)
5123             {
5124               remainder = gen_reg_rtx (compute_mode);
5125               if (!expand_twoval_binop_libfunc
5126                   (unsignedp ? udivmod_optab : sdivmod_optab,
5127                    op0, op1,
5128                    NULL_RTX, remainder,
5129                    unsignedp ? UMOD : MOD))
5130                 remainder = NULL_RTX;
5131             }
5132         }
5133       else
5134         {
5135           /* We divided.  Now finish doing X - Y * (X / Y).  */
5136           remainder = expand_mult (compute_mode, quotient, op1,
5137                                    NULL_RTX, unsignedp);
5138           remainder = expand_binop (compute_mode, sub_optab, op0,
5139                                     remainder, target, unsignedp,
5140                                     OPTAB_LIB_WIDEN);
5141         }
5142     }
5143
5144   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5145 }
5146 \f
5147 /* Return a tree node with data type TYPE, describing the value of X.
5148    Usually this is an VAR_DECL, if there is no obvious better choice.
5149    X may be an expression, however we only support those expressions
5150    generated by loop.c.  */
5151
5152 tree
5153 make_tree (tree type, rtx x)
5154 {
5155   tree t;
5156
5157   switch (GET_CODE (x))
5158     {
5159     case CONST_INT:
5160     case CONST_WIDE_INT:
5161       t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5162       return t;
5163
5164     case CONST_DOUBLE:
5165       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5166       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5167         t = wide_int_to_tree (type,
5168                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5169                                                     HOST_BITS_PER_WIDE_INT * 2));
5170       else
5171         t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5172
5173       return t;
5174
5175     case CONST_VECTOR:
5176       {
5177         int units = CONST_VECTOR_NUNITS (x);
5178         tree itype = TREE_TYPE (type);
5179         tree *elts;
5180         int i;
5181
5182         /* Build a tree with vector elements.  */
5183         elts = XALLOCAVEC (tree, units);
5184         for (i = units - 1; i >= 0; --i)
5185           {
5186             rtx elt = CONST_VECTOR_ELT (x, i);
5187             elts[i] = make_tree (itype, elt);
5188           }
5189
5190         return build_vector (type, elts);
5191       }
5192
5193     case PLUS:
5194       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5195                           make_tree (type, XEXP (x, 1)));
5196
5197     case MINUS:
5198       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5199                           make_tree (type, XEXP (x, 1)));
5200
5201     case NEG:
5202       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5203
5204     case MULT:
5205       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5206                           make_tree (type, XEXP (x, 1)));
5207
5208     case ASHIFT:
5209       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5210                           make_tree (type, XEXP (x, 1)));
5211
5212     case LSHIFTRT:
5213       t = unsigned_type_for (type);
5214       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5215                                          make_tree (t, XEXP (x, 0)),
5216                                          make_tree (type, XEXP (x, 1))));
5217
5218     case ASHIFTRT:
5219       t = signed_type_for (type);
5220       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5221                                          make_tree (t, XEXP (x, 0)),
5222                                          make_tree (type, XEXP (x, 1))));
5223
5224     case DIV:
5225       if (TREE_CODE (type) != REAL_TYPE)
5226         t = signed_type_for (type);
5227       else
5228         t = type;
5229
5230       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5231                                          make_tree (t, XEXP (x, 0)),
5232                                          make_tree (t, XEXP (x, 1))));
5233     case UDIV:
5234       t = unsigned_type_for (type);
5235       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5236                                          make_tree (t, XEXP (x, 0)),
5237                                          make_tree (t, XEXP (x, 1))));
5238
5239     case SIGN_EXTEND:
5240     case ZERO_EXTEND:
5241       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5242                                           GET_CODE (x) == ZERO_EXTEND);
5243       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5244
5245     case CONST:
5246       return make_tree (type, XEXP (x, 0));
5247
5248     case SYMBOL_REF:
5249       t = SYMBOL_REF_DECL (x);
5250       if (t)
5251         return fold_convert (type, build_fold_addr_expr (t));
5252       /* fall through.  */
5253
5254     default:
5255       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5256
5257       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5258          address mode to pointer mode.  */
5259       if (POINTER_TYPE_P (type))
5260         x = convert_memory_address_addr_space
5261               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5262
5263       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5264          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5265       t->decl_with_rtl.rtl = x;
5266
5267       return t;
5268     }
5269 }
5270 \f
5271 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5272    and returning TARGET.
5273
5274    If TARGET is 0, a pseudo-register or constant is returned.  */
5275
5276 rtx
5277 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5278 {
5279   rtx tem = 0;
5280
5281   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5282     tem = simplify_binary_operation (AND, mode, op0, op1);
5283   if (tem == 0)
5284     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5285
5286   if (target == 0)
5287     target = tem;
5288   else if (tem != target)
5289     emit_move_insn (target, tem);
5290   return target;
5291 }
5292
5293 /* Helper function for emit_store_flag.  */
5294 rtx
5295 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5296              machine_mode mode, machine_mode compare_mode,
5297              int unsignedp, rtx x, rtx y, int normalizep,
5298              machine_mode target_mode)
5299 {
5300   struct expand_operand ops[4];
5301   rtx op0, comparison, subtarget;
5302   rtx_insn *last;
5303   machine_mode result_mode = targetm.cstore_mode (icode);
5304
5305   last = get_last_insn ();
5306   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5307   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5308   if (!x || !y)
5309     {
5310       delete_insns_since (last);
5311       return NULL_RTX;
5312     }
5313
5314   if (target_mode == VOIDmode)
5315     target_mode = result_mode;
5316   if (!target)
5317     target = gen_reg_rtx (target_mode);
5318
5319   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5320
5321   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5322   create_fixed_operand (&ops[1], comparison);
5323   create_fixed_operand (&ops[2], x);
5324   create_fixed_operand (&ops[3], y);
5325   if (!maybe_expand_insn (icode, 4, ops))
5326     {
5327       delete_insns_since (last);
5328       return NULL_RTX;
5329     }
5330   subtarget = ops[0].value;
5331
5332   /* If we are converting to a wider mode, first convert to
5333      TARGET_MODE, then normalize.  This produces better combining
5334      opportunities on machines that have a SIGN_EXTRACT when we are
5335      testing a single bit.  This mostly benefits the 68k.
5336
5337      If STORE_FLAG_VALUE does not have the sign bit set when
5338      interpreted in MODE, we can do this conversion as unsigned, which
5339      is usually more efficient.  */
5340   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5341     {
5342       convert_move (target, subtarget,
5343                     val_signbit_known_clear_p (result_mode,
5344                                                STORE_FLAG_VALUE));
5345       op0 = target;
5346       result_mode = target_mode;
5347     }
5348   else
5349     op0 = subtarget;
5350
5351   /* If we want to keep subexpressions around, don't reuse our last
5352      target.  */
5353   if (optimize)
5354     subtarget = 0;
5355
5356   /* Now normalize to the proper value in MODE.  Sometimes we don't
5357      have to do anything.  */
5358   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5359     ;
5360   /* STORE_FLAG_VALUE might be the most negative number, so write
5361      the comparison this way to avoid a compiler-time warning.  */
5362   else if (- normalizep == STORE_FLAG_VALUE)
5363     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5364
5365   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5366      it hard to use a value of just the sign bit due to ANSI integer
5367      constant typing rules.  */
5368   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5369     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5370                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5371                         normalizep == 1);
5372   else
5373     {
5374       gcc_assert (STORE_FLAG_VALUE & 1);
5375
5376       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5377       if (normalizep == -1)
5378         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5379     }
5380
5381   /* If we were converting to a smaller mode, do the conversion now.  */
5382   if (target_mode != result_mode)
5383     {
5384       convert_move (target, op0, 0);
5385       return target;
5386     }
5387   else
5388     return op0;
5389 }
5390
5391
5392 /* A subroutine of emit_store_flag only including "tricks" that do not
5393    need a recursive call.  These are kept separate to avoid infinite
5394    loops.  */
5395
5396 static rtx
5397 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5398                    machine_mode mode, int unsignedp, int normalizep,
5399                    machine_mode target_mode)
5400 {
5401   rtx subtarget;
5402   enum insn_code icode;
5403   machine_mode compare_mode;
5404   enum mode_class mclass;
5405   enum rtx_code scode;
5406
5407   if (unsignedp)
5408     code = unsigned_condition (code);
5409   scode = swap_condition (code);
5410
5411   /* If one operand is constant, make it the second one.  Only do this
5412      if the other operand is not constant as well.  */
5413
5414   if (swap_commutative_operands_p (op0, op1))
5415     {
5416       std::swap (op0, op1);
5417       code = swap_condition (code);
5418     }
5419
5420   if (mode == VOIDmode)
5421     mode = GET_MODE (op0);
5422
5423   /* For some comparisons with 1 and -1, we can convert this to
5424      comparisons with zero.  This will often produce more opportunities for
5425      store-flag insns.  */
5426
5427   switch (code)
5428     {
5429     case LT:
5430       if (op1 == const1_rtx)
5431         op1 = const0_rtx, code = LE;
5432       break;
5433     case LE:
5434       if (op1 == constm1_rtx)
5435         op1 = const0_rtx, code = LT;
5436       break;
5437     case GE:
5438       if (op1 == const1_rtx)
5439         op1 = const0_rtx, code = GT;
5440       break;
5441     case GT:
5442       if (op1 == constm1_rtx)
5443         op1 = const0_rtx, code = GE;
5444       break;
5445     case GEU:
5446       if (op1 == const1_rtx)
5447         op1 = const0_rtx, code = NE;
5448       break;
5449     case LTU:
5450       if (op1 == const1_rtx)
5451         op1 = const0_rtx, code = EQ;
5452       break;
5453     default:
5454       break;
5455     }
5456
5457   /* If we are comparing a double-word integer with zero or -1, we can
5458      convert the comparison into one involving a single word.  */
5459   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5460       && GET_MODE_CLASS (mode) == MODE_INT
5461       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5462     {
5463       rtx tem;
5464       if ((code == EQ || code == NE)
5465           && (op1 == const0_rtx || op1 == constm1_rtx))
5466         {
5467           rtx op00, op01;
5468
5469           /* Do a logical OR or AND of the two words and compare the
5470              result.  */
5471           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5472           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5473           tem = expand_binop (word_mode,
5474                               op1 == const0_rtx ? ior_optab : and_optab,
5475                               op00, op01, NULL_RTX, unsignedp,
5476                               OPTAB_DIRECT);
5477
5478           if (tem != 0)
5479             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5480                                    unsignedp, normalizep);
5481         }
5482       else if ((code == LT || code == GE) && op1 == const0_rtx)
5483         {
5484           rtx op0h;
5485
5486           /* If testing the sign bit, can just test on high word.  */
5487           op0h = simplify_gen_subreg (word_mode, op0, mode,
5488                                       subreg_highpart_offset (word_mode,
5489                                                               mode));
5490           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5491                                  unsignedp, normalizep);
5492         }
5493       else
5494         tem = NULL_RTX;
5495
5496       if (tem)
5497         {
5498           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5499             return tem;
5500           if (!target)
5501             target = gen_reg_rtx (target_mode);
5502
5503           convert_move (target, tem,
5504                         !val_signbit_known_set_p (word_mode,
5505                                                   (normalizep ? normalizep
5506                                                    : STORE_FLAG_VALUE)));
5507           return target;
5508         }
5509     }
5510
5511   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5512      complement of A (for GE) and shifting the sign bit to the low bit.  */
5513   if (op1 == const0_rtx && (code == LT || code == GE)
5514       && GET_MODE_CLASS (mode) == MODE_INT
5515       && (normalizep || STORE_FLAG_VALUE == 1
5516           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5517     {
5518       subtarget = target;
5519
5520       if (!target)
5521         target_mode = mode;
5522
5523       /* If the result is to be wider than OP0, it is best to convert it
5524          first.  If it is to be narrower, it is *incorrect* to convert it
5525          first.  */
5526       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5527         {
5528           op0 = convert_modes (target_mode, mode, op0, 0);
5529           mode = target_mode;
5530         }
5531
5532       if (target_mode != mode)
5533         subtarget = 0;
5534
5535       if (code == GE)
5536         op0 = expand_unop (mode, one_cmpl_optab, op0,
5537                            ((STORE_FLAG_VALUE == 1 || normalizep)
5538                             ? 0 : subtarget), 0);
5539
5540       if (STORE_FLAG_VALUE == 1 || normalizep)
5541         /* If we are supposed to produce a 0/1 value, we want to do
5542            a logical shift from the sign bit to the low-order bit; for
5543            a -1/0 value, we do an arithmetic shift.  */
5544         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5545                             GET_MODE_BITSIZE (mode) - 1,
5546                             subtarget, normalizep != -1);
5547
5548       if (mode != target_mode)
5549         op0 = convert_modes (target_mode, mode, op0, 0);
5550
5551       return op0;
5552     }
5553
5554   mclass = GET_MODE_CLASS (mode);
5555   FOR_EACH_MODE_FROM (compare_mode, mode)
5556     {
5557      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5558      icode = optab_handler (cstore_optab, optab_mode);
5559      if (icode != CODE_FOR_nothing)
5560         {
5561           do_pending_stack_adjust ();
5562           rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5563                                  unsignedp, op0, op1, normalizep, target_mode);
5564           if (tem)
5565             return tem;
5566
5567           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5568             {
5569               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5570                                  unsignedp, op1, op0, normalizep, target_mode);
5571               if (tem)
5572                 return tem;
5573             }
5574           break;
5575         }
5576     }
5577
5578   return 0;
5579 }
5580
5581 /* Subroutine of emit_store_flag that handles cases in which the operands
5582    are scalar integers.  SUBTARGET is the target to use for temporary
5583    operations and TRUEVAL is the value to store when the condition is
5584    true.  All other arguments are as for emit_store_flag.  */
5585
5586 rtx
5587 emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0,
5588                      rtx op1, machine_mode mode, int unsignedp,
5589                      int normalizep, rtx trueval)
5590 {
5591   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5592   rtx_insn *last = get_last_insn ();
5593   rtx tem;
5594
5595   /* If this is an equality comparison of integers, we can try to exclusive-or
5596      (or subtract) the two operands and use a recursive call to try the
5597      comparison with zero.  Don't do any of these cases if branches are
5598      very cheap.  */
5599
5600   if ((code == EQ || code == NE) && op1 != const0_rtx)
5601     {
5602       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5603                           OPTAB_WIDEN);
5604
5605       if (tem == 0)
5606         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5607                             OPTAB_WIDEN);
5608       if (tem != 0)
5609         tem = emit_store_flag (target, code, tem, const0_rtx,
5610                                mode, unsignedp, normalizep);
5611       if (tem != 0)
5612         return tem;
5613
5614       delete_insns_since (last);
5615     }
5616
5617   /* For integer comparisons, try the reverse comparison.  However, for
5618      small X and if we'd have anyway to extend, implementing "X != 0"
5619      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5620   rtx_code rcode = reverse_condition (code);
5621   if (can_compare_p (rcode, mode, ccp_store_flag)
5622       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5623             && code == NE
5624             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5625             && op1 == const0_rtx))
5626     {
5627       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5628                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5629
5630       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5631       if (want_add
5632           && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5633                        optimize_insn_for_speed_p ()) == 0)
5634         {
5635           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5636                                    STORE_FLAG_VALUE, target_mode);
5637           if (tem != 0)
5638             tem = expand_binop (target_mode, add_optab, tem,
5639                                 gen_int_mode (normalizep, target_mode),
5640                                 target, 0, OPTAB_WIDEN);
5641         }
5642       else if (!want_add
5643                && rtx_cost (trueval, mode, XOR, 1,
5644                             optimize_insn_for_speed_p ()) == 0)
5645         {
5646           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5647                                    normalizep, target_mode);
5648           if (tem != 0)
5649             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5650                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5651         }
5652
5653       if (tem != 0)
5654         return tem;
5655       delete_insns_since (last);
5656     }
5657
5658   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5659      the constant zero.  Reject all other comparisons at this point.  Only
5660      do LE and GT if branches are expensive since they are expensive on
5661      2-operand machines.  */
5662
5663   if (op1 != const0_rtx
5664       || (code != EQ && code != NE
5665           && (BRANCH_COST (optimize_insn_for_speed_p (),
5666                            false) <= 1 || (code != LE && code != GT))))
5667     return 0;
5668
5669   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5670      do the necessary operation below.  */
5671
5672   tem = 0;
5673
5674   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5675      the sign bit set.  */
5676
5677   if (code == LE)
5678     {
5679       /* This is destructive, so SUBTARGET can't be OP0.  */
5680       if (rtx_equal_p (subtarget, op0))
5681         subtarget = 0;
5682
5683       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5684                           OPTAB_WIDEN);
5685       if (tem)
5686         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5687                             OPTAB_WIDEN);
5688     }
5689
5690   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5691      number of bits in the mode of OP0, minus one.  */
5692
5693   if (code == GT)
5694     {
5695       if (rtx_equal_p (subtarget, op0))
5696         subtarget = 0;
5697
5698       tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5699                                 GET_MODE_BITSIZE (mode) - 1,
5700                                 subtarget, 0);
5701       if (tem)
5702         tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5703                             OPTAB_WIDEN);
5704     }
5705
5706   if (code == EQ || code == NE)
5707     {
5708       /* For EQ or NE, one way to do the comparison is to apply an operation
5709          that converts the operand into a positive number if it is nonzero
5710          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5711          for NE we negate.  This puts the result in the sign bit.  Then we
5712          normalize with a shift, if needed.
5713
5714          Two operations that can do the above actions are ABS and FFS, so try
5715          them.  If that doesn't work, and MODE is smaller than a full word,
5716          we can use zero-extension to the wider mode (an unsigned conversion)
5717          as the operation.  */
5718
5719       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5720          that is compensated by the subsequent overflow when subtracting
5721          one / negating.  */
5722
5723       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5724         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5725       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5726         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5727       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5728         {
5729           tem = convert_modes (word_mode, mode, op0, 1);
5730           mode = word_mode;
5731         }
5732
5733       if (tem != 0)
5734         {
5735           if (code == EQ)
5736             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5737                                 0, OPTAB_WIDEN);
5738           else
5739             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5740         }
5741
5742       /* If we couldn't do it that way, for NE we can "or" the two's complement
5743          of the value with itself.  For EQ, we take the one's complement of
5744          that "or", which is an extra insn, so we only handle EQ if branches
5745          are expensive.  */
5746
5747       if (tem == 0
5748           && (code == NE
5749               || BRANCH_COST (optimize_insn_for_speed_p (),
5750                               false) > 1))
5751         {
5752           if (rtx_equal_p (subtarget, op0))
5753             subtarget = 0;
5754
5755           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5756           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5757                               OPTAB_WIDEN);
5758
5759           if (tem && code == EQ)
5760             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5761         }
5762     }
5763
5764   if (tem && normalizep)
5765     tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5766                               GET_MODE_BITSIZE (mode) - 1,
5767                               subtarget, normalizep == 1);
5768
5769   if (tem)
5770     {
5771       if (!target)
5772         ;
5773       else if (GET_MODE (tem) != target_mode)
5774         {
5775           convert_move (target, tem, 0);
5776           tem = target;
5777         }
5778       else if (!subtarget)
5779         {
5780           emit_move_insn (target, tem);
5781           tem = target;
5782         }
5783     }
5784   else
5785     delete_insns_since (last);
5786
5787   return tem;
5788 }
5789
5790 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5791    and storing in TARGET.  Normally return TARGET.
5792    Return 0 if that cannot be done.
5793
5794    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5795    it is VOIDmode, they cannot both be CONST_INT.
5796
5797    UNSIGNEDP is for the case where we have to widen the operands
5798    to perform the operation.  It says to use zero-extension.
5799
5800    NORMALIZEP is 1 if we should convert the result to be either zero
5801    or one.  Normalize is -1 if we should convert the result to be
5802    either zero or -1.  If NORMALIZEP is zero, the result will be left
5803    "raw" out of the scc insn.  */
5804
5805 rtx
5806 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5807                  machine_mode mode, int unsignedp, int normalizep)
5808 {
5809   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5810   enum rtx_code rcode;
5811   rtx subtarget;
5812   rtx tem, trueval;
5813   rtx_insn *last;
5814
5815   /* If we compare constants, we shouldn't use a store-flag operation,
5816      but a constant load.  We can get there via the vanilla route that
5817      usually generates a compare-branch sequence, but will in this case
5818      fold the comparison to a constant, and thus elide the branch.  */
5819   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5820     return NULL_RTX;
5821
5822   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5823                            target_mode);
5824   if (tem)
5825     return tem;
5826
5827   /* If we reached here, we can't do this with a scc insn, however there
5828      are some comparisons that can be done in other ways.  Don't do any
5829      of these cases if branches are very cheap.  */
5830   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5831     return 0;
5832
5833   /* See what we need to return.  We can only return a 1, -1, or the
5834      sign bit.  */
5835
5836   if (normalizep == 0)
5837     {
5838       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5839         normalizep = STORE_FLAG_VALUE;
5840
5841       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5842         ;
5843       else
5844         return 0;
5845     }
5846
5847   last = get_last_insn ();
5848
5849   /* If optimizing, use different pseudo registers for each insn, instead
5850      of reusing the same pseudo.  This leads to better CSE, but slows
5851      down the compiler, since there are more pseudos.  */
5852   subtarget = (!optimize
5853                && (target_mode == mode)) ? target : NULL_RTX;
5854   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5855
5856   /* For floating-point comparisons, try the reverse comparison or try
5857      changing the "orderedness" of the comparison.  */
5858   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5859     {
5860       enum rtx_code first_code;
5861       bool and_them;
5862
5863       rcode = reverse_condition_maybe_unordered (code);
5864       if (can_compare_p (rcode, mode, ccp_store_flag)
5865           && (code == ORDERED || code == UNORDERED
5866               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5867               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5868         {
5869           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5870                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5871
5872           /* For the reverse comparison, use either an addition or a XOR.  */
5873           if (want_add
5874               && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5875                            optimize_insn_for_speed_p ()) == 0)
5876             {
5877               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5878                                        STORE_FLAG_VALUE, target_mode);
5879               if (tem)
5880                 return expand_binop (target_mode, add_optab, tem,
5881                                      gen_int_mode (normalizep, target_mode),
5882                                      target, 0, OPTAB_WIDEN);
5883             }
5884           else if (!want_add
5885                    && rtx_cost (trueval, mode, XOR, 1,
5886                                 optimize_insn_for_speed_p ()) == 0)
5887             {
5888               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5889                                        normalizep, target_mode);
5890               if (tem)
5891                 return expand_binop (target_mode, xor_optab, tem, trueval,
5892                                      target, INTVAL (trueval) >= 0,
5893                                      OPTAB_WIDEN);
5894             }
5895         }
5896
5897       delete_insns_since (last);
5898
5899       /* Cannot split ORDERED and UNORDERED, only try the above trick.  */
5900       if (code == ORDERED || code == UNORDERED)
5901         return 0;
5902
5903       and_them = split_comparison (code, mode, &first_code, &code);
5904
5905       /* If there are no NaNs, the first comparison should always fall through.
5906          Effectively change the comparison to the other one.  */
5907       if (!HONOR_NANS (mode))
5908         {
5909           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5910           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5911                                     target_mode);
5912         }
5913
5914       if (!HAVE_conditional_move)
5915         return 0;
5916
5917       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5918          conditional move.  */
5919       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5920                                normalizep, target_mode);
5921       if (tem == 0)
5922         return 0;
5923
5924       if (and_them)
5925         tem = emit_conditional_move (target, code, op0, op1, mode,
5926                                      tem, const0_rtx, GET_MODE (tem), 0);
5927       else
5928         tem = emit_conditional_move (target, code, op0, op1, mode,
5929                                      trueval, tem, GET_MODE (tem), 0);
5930
5931       if (tem == 0)
5932         delete_insns_since (last);
5933       return tem;
5934     }
5935
5936   /* The remaining tricks only apply to integer comparisons.  */
5937
5938   if (GET_MODE_CLASS (mode) == MODE_INT)
5939     return emit_store_flag_int (target, subtarget, code, op0, op1, mode,
5940                                 unsignedp, normalizep, trueval);
5941
5942   return 0;
5943 }
5944
5945 /* Like emit_store_flag, but always succeeds.  */
5946
5947 rtx
5948 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5949                        machine_mode mode, int unsignedp, int normalizep)
5950 {
5951   rtx tem;
5952   rtx_code_label *label;
5953   rtx trueval, falseval;
5954
5955   /* First see if emit_store_flag can do the job.  */
5956   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5957   if (tem != 0)
5958     return tem;
5959
5960   if (!target)
5961     target = gen_reg_rtx (word_mode);
5962
5963   /* If this failed, we have to do this with set/compare/jump/set code.
5964      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5965   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5966   if (code == NE
5967       && GET_MODE_CLASS (mode) == MODE_INT
5968       && REG_P (target)
5969       && op0 == target
5970       && op1 == const0_rtx)
5971     {
5972       label = gen_label_rtx ();
5973       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
5974                                NULL_RTX, NULL, label,
5975                                profile_probability::uninitialized ());
5976       emit_move_insn (target, trueval);
5977       emit_label (label);
5978       return target;
5979     }
5980
5981   if (!REG_P (target)
5982       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5983     target = gen_reg_rtx (GET_MODE (target));
5984
5985   /* Jump in the right direction if the target cannot implement CODE
5986      but can jump on its reverse condition.  */
5987   falseval = const0_rtx;
5988   if (! can_compare_p (code, mode, ccp_jump)
5989       && (! FLOAT_MODE_P (mode)
5990           || code == ORDERED || code == UNORDERED
5991           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5992           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5993     {
5994       enum rtx_code rcode;
5995       if (FLOAT_MODE_P (mode))
5996         rcode = reverse_condition_maybe_unordered (code);
5997       else
5998         rcode = reverse_condition (code);
5999
6000       /* Canonicalize to UNORDERED for the libcall.  */
6001       if (can_compare_p (rcode, mode, ccp_jump)
6002           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
6003         {
6004           falseval = trueval;
6005           trueval = const0_rtx;
6006           code = rcode;
6007         }
6008     }
6009
6010   emit_move_insn (target, trueval);
6011   label = gen_label_rtx ();
6012   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
6013                            label, profile_probability::uninitialized ());
6014
6015   emit_move_insn (target, falseval);
6016   emit_label (label);
6017
6018   return target;
6019 }
6020 \f
6021 /* Perform possibly multi-word comparison and conditional jump to LABEL
6022    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
6023    now a thin wrapper around do_compare_rtx_and_jump.  */
6024
6025 static void
6026 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6027                  rtx_code_label *label)
6028 {
6029   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6030   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6031                            NULL, label, profile_probability::uninitialized ());
6032 }