gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2017 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "target.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "predict.h"
  30 #include "memmodel.h"
  31 #include "tm_p.h"
  32 #include "expmed.h"
  33 #include "optabs.h"
  34 #include "emit-rtl.h"
  35 #include "diagnostic-core.h"
  36 #include "fold-const.h"
  37 #include "stor-layout.h"
  38 #include "dojump.h"
  39 #include "explow.h"
  40 #include "expr.h"
  41 #include "langhooks.h"
  42
  43 struct target_expmed default_target_expmed;
  44 #if SWITCHABLE_TARGET
  45 struct target_expmed *this_target_expmed = &default_target_expmed;
  46 #endif
  47
  48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  49                                    unsigned HOST_WIDE_INT,
  50                                    unsigned HOST_WIDE_INT,
  51                                    unsigned HOST_WIDE_INT,
  52                                    rtx, bool);
  53 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
  54                                      unsigned HOST_WIDE_INT,
  55                                      rtx, bool);
  56 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  57                                    unsigned HOST_WIDE_INT,
  58                                    unsigned HOST_WIDE_INT,
  59                                    unsigned HOST_WIDE_INT,
  60                                    rtx, bool);
  61 static rtx extract_fixed_bit_field (machine_mode, rtx,
  62                                     unsigned HOST_WIDE_INT,
  63                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  64 static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
  65                                       unsigned HOST_WIDE_INT,
  66                                       unsigned HOST_WIDE_INT, rtx, int, bool);
  67 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  68 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  69                                     unsigned HOST_WIDE_INT, int, bool);
  70 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  71 static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  72 static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  73
  74 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  75    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  76    The mask is truncated if necessary to the width of mode MODE.  The
  77    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
  78
  79 static inline rtx
  80 mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
  81 {
  82   return immed_wide_int_const
  83     (wi::shifted_mask (bitpos, bitsize, complement,
  84                        GET_MODE_PRECISION (mode)), mode);
  85 }
  86
  87 /* Test whether a value is zero of a power of two.  */
  88 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
  89   (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
  90
  91 struct init_expmed_rtl
  92 {
  93   rtx reg;
  94   rtx plus;
  95   rtx neg;
  96   rtx mult;
  97   rtx sdiv;
  98   rtx udiv;
  99   rtx sdiv_32;
 100   rtx smod_32;
 101   rtx wide_mult;
 102   rtx wide_lshr;
 103   rtx wide_trunc;
 104   rtx shift;
 105   rtx shift_mult;
 106   rtx shift_add;
 107   rtx shift_sub0;
 108   rtx shift_sub1;
 109   rtx zext;
 110   rtx trunc;
 111
 112   rtx pow2[MAX_BITS_PER_WORD];
 113   rtx cint[MAX_BITS_PER_WORD];
 114 };
 115
 116 static void
 117 init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
 118                       machine_mode from_mode, bool speed)
 119 {
 120   int to_size, from_size;
 121   rtx which;
 122
 123   to_size = GET_MODE_PRECISION (to_mode);
 124   from_size = GET_MODE_PRECISION (from_mode);
 125
 126   /* Most partial integers have a precision less than the "full"
 127      integer it requires for storage.  In case one doesn't, for
 128      comparison purposes here, reduce the bit size by one in that
 129      case.  */
 130   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 131       && pow2p_hwi (to_size))
 132     to_size --;
 133   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 134       && pow2p_hwi (from_size))
 135     from_size --;
 136
 137   /* Assume cost of zero-extend and sign-extend is the same.  */
 138   which = (to_size < from_size ? all->trunc : all->zext);
 139
 140   PUT_MODE (all->reg, from_mode);
 141   set_convert_cost (to_mode, from_mode, speed,
 142                     set_src_cost (which, to_mode, speed));
 143 }
 144
 145 static void
 146 init_expmed_one_mode (struct init_expmed_rtl *all,
 147                       machine_mode mode, int speed)
 148 {
 149   int m, n, mode_bitsize;
 150   machine_mode mode_from;
 151
 152   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 153
 154   PUT_MODE (all->reg, mode);
 155   PUT_MODE (all->plus, mode);
 156   PUT_MODE (all->neg, mode);
 157   PUT_MODE (all->mult, mode);
 158   PUT_MODE (all->sdiv, mode);
 159   PUT_MODE (all->udiv, mode);
 160   PUT_MODE (all->sdiv_32, mode);
 161   PUT_MODE (all->smod_32, mode);
 162   PUT_MODE (all->wide_trunc, mode);
 163   PUT_MODE (all->shift, mode);
 164   PUT_MODE (all->shift_mult, mode);
 165   PUT_MODE (all->shift_add, mode);
 166   PUT_MODE (all->shift_sub0, mode);
 167   PUT_MODE (all->shift_sub1, mode);
 168   PUT_MODE (all->zext, mode);
 169   PUT_MODE (all->trunc, mode);
 170
 171   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
 172   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
 173   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
 174   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
 175   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
 176
 177   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
 178                                      <= 2 * add_cost (speed, mode)));
 179   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
 180                                      <= 4 * add_cost (speed, mode)));
 181
 182   set_shift_cost (speed, mode, 0, 0);
 183   {
 184     int cost = add_cost (speed, mode);
 185     set_shiftadd_cost (speed, mode, 0, cost);
 186     set_shiftsub0_cost (speed, mode, 0, cost);
 187     set_shiftsub1_cost (speed, mode, 0, cost);
 188   }
 189
 190   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 191   for (m = 1; m < n; m++)
 192     {
 193       XEXP (all->shift, 1) = all->cint[m];
 194       XEXP (all->shift_mult, 1) = all->pow2[m];
 195
 196       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
 197       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
 198                                                        speed));
 199       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
 200                                                         speed));
 201       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
 202                                                         speed));
 203     }
 204
 205   if (SCALAR_INT_MODE_P (mode))
 206     {
 207       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 208            mode_from = (machine_mode)(mode_from + 1))
 209         init_expmed_one_conv (all, mode, mode_from, speed);
 210     }
 211   if (GET_MODE_CLASS (mode) == MODE_INT)
 212     {
 213       machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
 214       if (wider_mode != VOIDmode)
 215         {
 216           PUT_MODE (all->zext, wider_mode);
 217           PUT_MODE (all->wide_mult, wider_mode);
 218           PUT_MODE (all->wide_lshr, wider_mode);
 219           XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 220
 221           set_mul_widen_cost (speed, wider_mode,
 222                               set_src_cost (all->wide_mult, wider_mode, speed));
 223           set_mul_highpart_cost (speed, mode,
 224                                  set_src_cost (all->wide_trunc, mode, speed));
 225         }
 226     }
 227 }
 228
 229 void
 230 init_expmed (void)
 231 {
 232   struct init_expmed_rtl all;
 233   machine_mode mode = QImode;
 234   int m, speed;
 235
 236   memset (&all, 0, sizeof all);
 237   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 238     {
 239       all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
 240       all.cint[m] = GEN_INT (m);
 241     }
 242
 243   /* Avoid using hard regs in ways which may be unsupported.  */
 244   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 245   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 246   all.neg = gen_rtx_NEG (mode, all.reg);
 247   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 248   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 249   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 250   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 251   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 252   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 253   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 254   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 255   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 256   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 257   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 258   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 259   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 260   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 261   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 262
 263   for (speed = 0; speed < 2; speed++)
 264     {
 265       crtl->maybe_hot_insn_p = speed;
 266       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
 267
 268       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 269            mode = (machine_mode)(mode + 1))
 270         init_expmed_one_mode (&all, mode, speed);
 271
 272       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 273         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 274              mode = (machine_mode)(mode + 1))
 275           init_expmed_one_mode (&all, mode, speed);
 276
 277       if (MIN_MODE_VECTOR_INT != VOIDmode)
 278         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 279              mode = (machine_mode)(mode + 1))
 280           init_expmed_one_mode (&all, mode, speed);
 281     }
 282
 283   if (alg_hash_used_p ())
 284     {
 285       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 286       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 287     }
 288   else
 289     set_alg_hash_used_p (true);
 290   default_rtl_profile ();
 291
 292   ggc_free (all.trunc);
 293   ggc_free (all.shift_sub1);
 294   ggc_free (all.shift_sub0);
 295   ggc_free (all.shift_add);
 296   ggc_free (all.shift_mult);
 297   ggc_free (all.shift);
 298   ggc_free (all.wide_trunc);
 299   ggc_free (all.wide_lshr);
 300   ggc_free (all.wide_mult);
 301   ggc_free (all.zext);
 302   ggc_free (all.smod_32);
 303   ggc_free (all.sdiv_32);
 304   ggc_free (all.udiv);
 305   ggc_free (all.sdiv);
 306   ggc_free (all.mult);
 307   ggc_free (all.neg);
 308   ggc_free (all.plus);
 309   ggc_free (all.reg);
 310 }
 311
 312 /* Return an rtx representing minus the value of X.
 313    MODE is the intended mode of the result,
 314    useful if X is a CONST_INT.  */
 315
 316 rtx
 317 negate_rtx (machine_mode mode, rtx x)
 318 {
 319   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 320
 321   if (result == 0)
 322     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 323
 324   return result;
 325 }
 326
 327 /* Whether reverse storage order is supported on the target.  */
 328 static int reverse_storage_order_supported = -1;
 329
 330 /* Check whether reverse storage order is supported on the target.  */
 331
 332 static void
 333 check_reverse_storage_order_support (void)
 334 {
 335   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 336     {
 337       reverse_storage_order_supported = 0;
 338       sorry ("reverse scalar storage order");
 339     }
 340   else
 341     reverse_storage_order_supported = 1;
 342 }
 343
 344 /* Whether reverse FP storage order is supported on the target.  */
 345 static int reverse_float_storage_order_supported = -1;
 346
 347 /* Check whether reverse FP storage order is supported on the target.  */
 348
 349 static void
 350 check_reverse_float_storage_order_support (void)
 351 {
 352   if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 353     {
 354       reverse_float_storage_order_supported = 0;
 355       sorry ("reverse floating-point scalar storage order");
 356     }
 357   else
 358     reverse_float_storage_order_supported = 1;
 359 }
 360
 361 /* Return an rtx representing value of X with reverse storage order.
 362    MODE is the intended mode of the result,
 363    useful if X is a CONST_INT.  */
 364
 365 rtx
 366 flip_storage_order (enum machine_mode mode, rtx x)
 367 {
 368   enum machine_mode int_mode;
 369   rtx result;
 370
 371   if (mode == QImode)
 372     return x;
 373
 374   if (COMPLEX_MODE_P (mode))
 375     {
 376       rtx real = read_complex_part (x, false);
 377       rtx imag = read_complex_part (x, true);
 378
 379       real = flip_storage_order (GET_MODE_INNER (mode), real);
 380       imag = flip_storage_order (GET_MODE_INNER (mode), imag);
 381
 382       return gen_rtx_CONCAT (mode, real, imag);
 383     }
 384
 385   if (__builtin_expect (reverse_storage_order_supported < 0, 0))
 386     check_reverse_storage_order_support ();
 387
 388   if (SCALAR_INT_MODE_P (mode))
 389     int_mode = mode;
 390   else
 391     {
 392       if (FLOAT_MODE_P (mode)
 393           && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
 394         check_reverse_float_storage_order_support ();
 395
 396       int_mode = mode_for_size (GET_MODE_PRECISION (mode), MODE_INT, 0);
 397       if (int_mode == BLKmode)
 398         {
 399           sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
 400           return x;
 401         }
 402       x = gen_lowpart (int_mode, x);
 403     }
 404
 405   result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
 406   if (result == 0)
 407     result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
 408
 409   if (int_mode != mode)
 410     result = gen_lowpart (mode, result);
 411
 412   return result;
 413 }
 414
 415 /* Adjust bitfield memory MEM so that it points to the first unit of mode
 416    MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
 417    If MODE is BLKmode, return a reference to every byte in the bitfield.
 418    Set *NEW_BITNUM to the bit position of the field within the new memory.  */
 419
 420 static rtx
 421 narrow_bit_field_mem (rtx mem, machine_mode mode,
 422                       unsigned HOST_WIDE_INT bitsize,
 423                       unsigned HOST_WIDE_INT bitnum,
 424                       unsigned HOST_WIDE_INT *new_bitnum)
 425 {
 426   if (mode == BLKmode)
 427     {
 428       *new_bitnum = bitnum % BITS_PER_UNIT;
 429       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 430       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 431                             / BITS_PER_UNIT);
 432       return adjust_bitfield_address_size (mem, mode, offset, size);
 433     }
 434   else
 435     {
 436       unsigned int unit = GET_MODE_BITSIZE (mode);
 437       *new_bitnum = bitnum % unit;
 438       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 439       return adjust_bitfield_address (mem, mode, offset);
 440     }
 441 }
 442
 443 /* The caller wants to perform insertion or extraction PATTERN on a
 444    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 445    BITREGION_START and BITREGION_END are as for store_bit_field
 446    and FIELDMODE is the natural mode of the field.
 447
 448    Search for a mode that is compatible with the memory access
 449    restrictions and (where applicable) with a register insertion or
 450    extraction.  Return the new memory on success, storing the adjusted
 451    bit position in *NEW_BITNUM.  Return null otherwise.  */
 452
 453 static rtx
 454 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 455                               rtx op0, HOST_WIDE_INT bitsize,
 456                               HOST_WIDE_INT bitnum,
 457                               unsigned HOST_WIDE_INT bitregion_start,
 458                               unsigned HOST_WIDE_INT bitregion_end,
 459                               machine_mode fieldmode,
 460                               unsigned HOST_WIDE_INT *new_bitnum)
 461 {
 462   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 463                                 bitregion_end, MEM_ALIGN (op0),
 464                                 MEM_VOLATILE_P (op0));
 465   machine_mode best_mode;
 466   if (iter.next_mode (&best_mode))
 467     {
 468       /* We can use a memory in BEST_MODE.  See whether this is true for
 469          any wider modes.  All other things being equal, we prefer to
 470          use the widest mode possible because it tends to expose more
 471          CSE opportunities.  */
 472       if (!iter.prefer_smaller_modes ())
 473         {
 474           /* Limit the search to the mode required by the corresponding
 475              register insertion or extraction instruction, if any.  */
 476           machine_mode limit_mode = word_mode;
 477           extraction_insn insn;
 478           if (get_best_reg_extraction_insn (&insn, pattern,
 479                                             GET_MODE_BITSIZE (best_mode),
 480                                             fieldmode))
 481             limit_mode = insn.field_mode;
 482
 483           machine_mode wider_mode;
 484           while (iter.next_mode (&wider_mode)
 485                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 486             best_mode = wider_mode;
 487         }
 488       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 489                                    new_bitnum);
 490     }
 491   return NULL_RTX;
 492 }
 493
 494 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 495    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 496    offset is then BITNUM / BITS_PER_UNIT.  */
 497
 498 static bool
 499 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 500                      unsigned HOST_WIDE_INT bitsize,
 501                      machine_mode struct_mode)
 502 {
 503   if (BYTES_BIG_ENDIAN)
 504     return (bitnum % BITS_PER_UNIT == 0
 505             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 506                 || (bitnum + bitsize) % BITS_PER_WORD == 0));
 507   else
 508     return bitnum % BITS_PER_WORD == 0;
 509 }
 510
 511 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 512    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 513    Return false if the access would touch memory outside the range
 514    BITREGION_START to BITREGION_END for conformance to the C++ memory
 515    model.  */
 516
 517 static bool
 518 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 519                             unsigned HOST_WIDE_INT bitnum,
 520                             machine_mode fieldmode,
 521                             unsigned HOST_WIDE_INT bitregion_start,
 522                             unsigned HOST_WIDE_INT bitregion_end)
 523 {
 524   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 525
 526   /* -fstrict-volatile-bitfields must be enabled and we must have a
 527      volatile MEM.  */
 528   if (!MEM_P (op0)
 529       || !MEM_VOLATILE_P (op0)
 530       || flag_strict_volatile_bitfields <= 0)
 531     return false;
 532
 533   /* Non-integral modes likely only happen with packed structures.
 534      Punt.  */
 535   if (!SCALAR_INT_MODE_P (fieldmode))
 536     return false;
 537
 538   /* The bit size must not be larger than the field mode, and
 539      the field mode must not be larger than a word.  */
 540   if (bitsize > modesize || modesize > BITS_PER_WORD)
 541     return false;
 542
 543   /* Check for cases of unaligned fields that must be split.  */
 544   if (bitnum % modesize + bitsize > modesize)
 545     return false;
 546
 547   /* The memory must be sufficiently aligned for a MODESIZE access.
 548      This condition guarantees, that the memory access will not
 549      touch anything after the end of the structure.  */
 550   if (MEM_ALIGN (op0) < modesize)
 551     return false;
 552
 553   /* Check for cases where the C++ memory model applies.  */
 554   if (bitregion_end != 0
 555       && (bitnum - bitnum % modesize < bitregion_start
 556           || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
 557     return false;
 558
 559   return true;
 560 }
 561
 562 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 563    bit number BITNUM can be treated as a simple value of mode MODE.  */
 564
 565 static bool
 566 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 567                        unsigned HOST_WIDE_INT bitnum, machine_mode mode)
 568 {
 569   return (MEM_P (op0)
 570           && bitnum % BITS_PER_UNIT == 0
 571           && bitsize == GET_MODE_BITSIZE (mode)
 572           && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
 573               || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
 574                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 575 }
 576 \f
 577 /* Try to use instruction INSV to store VALUE into a field of OP0.
 578    BITSIZE and BITNUM are as for store_bit_field.  */
 579
 580 static bool
 581 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 582                             unsigned HOST_WIDE_INT bitsize,
 583                             unsigned HOST_WIDE_INT bitnum,
 584                             rtx value)
 585 {
 586   struct expand_operand ops[4];
 587   rtx value1;
 588   rtx xop0 = op0;
 589   rtx_insn *last = get_last_insn ();
 590   bool copy_back = false;
 591
 592   machine_mode op_mode = insv->field_mode;
 593   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 594   if (bitsize == 0 || bitsize > unit)
 595     return false;
 596
 597   if (MEM_P (xop0))
 598     /* Get a reference to the first byte of the field.  */
 599     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 600                                  &bitnum);
 601   else
 602     {
 603       /* Convert from counting within OP0 to counting in OP_MODE.  */
 604       if (BYTES_BIG_ENDIAN)
 605         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 606
 607       /* If xop0 is a register, we need it in OP_MODE
 608          to make it acceptable to the format of insv.  */
 609       if (GET_CODE (xop0) == SUBREG)
 610         /* We can't just change the mode, because this might clobber op0,
 611            and we will need the original value of op0 if insv fails.  */
 612         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 613       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 614         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 615     }
 616
 617   /* If the destination is a paradoxical subreg such that we need a
 618      truncate to the inner mode, perform the insertion on a temporary and
 619      truncate the result to the original destination.  Note that we can't
 620      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 621      X) 0)) is (reg:N X).  */
 622   if (GET_CODE (xop0) == SUBREG
 623       && REG_P (SUBREG_REG (xop0))
 624       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 625                                          op_mode))
 626     {
 627       rtx tem = gen_reg_rtx (op_mode);
 628       emit_move_insn (tem, xop0);
 629       xop0 = tem;
 630       copy_back = true;
 631     }
 632
 633   /* There are similar overflow check at the start of store_bit_field_1,
 634      but that only check the situation where the field lies completely
 635      outside the register, while there do have situation where the field
 636      lies partialy in the register, we need to adjust bitsize for this
 637      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
 638      will broken on those arch support bit insert instruction, like arm, aarch64
 639      etc.  */
 640   if (bitsize + bitnum > unit && bitnum < unit)
 641     {
 642       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
 643                "destination object, data truncated into %wu-bit",
 644                bitsize, unit - bitnum);
 645       bitsize = unit - bitnum;
 646     }
 647
 648   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 649      "backwards" from the size of the unit we are inserting into.
 650      Otherwise, we count bits from the most significant on a
 651      BYTES/BITS_BIG_ENDIAN machine.  */
 652
 653   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 654     bitnum = unit - bitsize - bitnum;
 655
 656   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 657   value1 = value;
 658   if (GET_MODE (value) != op_mode)
 659     {
 660       if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 661         {
 662           rtx tmp;
 663           /* Optimization: Don't bother really extending VALUE
 664              if it has all the bits we will actually use.  However,
 665              if we must narrow it, be sure we do it correctly.  */
 666
 667           if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 668             {
 669               tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 670               if (! tmp)
 671                 tmp = simplify_gen_subreg (op_mode,
 672                                            force_reg (GET_MODE (value),
 673                                                       value1),
 674                                            GET_MODE (value), 0);
 675             }
 676           else
 677             {
 678               tmp = gen_lowpart_if_possible (op_mode, value1);
 679               if (! tmp)
 680                 tmp = gen_lowpart (op_mode, force_reg (GET_MODE (value),
 681                                                        value1));
 682             }
 683           value1 = tmp;
 684         }
 685       else if (CONST_INT_P (value))
 686         value1 = gen_int_mode (INTVAL (value), op_mode);
 687       else
 688         /* Parse phase is supposed to make VALUE's data type
 689            match that of the component reference, which is a type
 690            at least as wide as the field; so VALUE should have
 691            a mode that corresponds to that type.  */
 692         gcc_assert (CONSTANT_P (value));
 693     }
 694
 695   create_fixed_operand (&ops[0], xop0);
 696   create_integer_operand (&ops[1], bitsize);
 697   create_integer_operand (&ops[2], bitnum);
 698   create_input_operand (&ops[3], value1, op_mode);
 699   if (maybe_expand_insn (insv->icode, 4, ops))
 700     {
 701       if (copy_back)
 702         convert_move (op0, xop0, true);
 703       return true;
 704     }
 705   delete_insns_since (last);
 706   return false;
 707 }
 708
 709 /* A subroutine of store_bit_field, with the same arguments.  Return true
 710    if the operation could be implemented.
 711
 712    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 713    no other way of implementing the operation.  If FALLBACK_P is false,
 714    return false instead.  */
 715
 716 static bool
 717 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 718                    unsigned HOST_WIDE_INT bitnum,
 719                    unsigned HOST_WIDE_INT bitregion_start,
 720                    unsigned HOST_WIDE_INT bitregion_end,
 721                    machine_mode fieldmode,
 722                    rtx value, bool reverse, bool fallback_p)
 723 {
 724   rtx op0 = str_rtx;
 725   rtx orig_value;
 726
 727   while (GET_CODE (op0) == SUBREG)
 728     {
 729       /* The following line once was done only if WORDS_BIG_ENDIAN,
 730          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 731          meaningful at a much higher level; when structures are copied
 732          between memory and regs, the higher-numbered regs
 733          always get higher addresses.  */
 734       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 735       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 736       int byte_offset = 0;
 737
 738       /* Paradoxical subregs need special handling on big-endian machines.  */
 739       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 740         {
 741           int difference = inner_mode_size - outer_mode_size;
 742
 743           if (WORDS_BIG_ENDIAN)
 744             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 745           if (BYTES_BIG_ENDIAN)
 746             byte_offset += difference % UNITS_PER_WORD;
 747         }
 748       else
 749         byte_offset = SUBREG_BYTE (op0);
 750
 751       bitnum += byte_offset * BITS_PER_UNIT;
 752       op0 = SUBREG_REG (op0);
 753     }
 754
 755   /* No action is needed if the target is a register and if the field
 756      lies completely outside that register.  This can occur if the source
 757      code contains an out-of-bounds access to a small array.  */
 758   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 759     return true;
 760
 761   /* Use vec_set patterns for inserting parts of vectors whenever
 762      available.  */
 763   if (VECTOR_MODE_P (GET_MODE (op0))
 764       && !MEM_P (op0)
 765       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 766       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 767       && bitsize == GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
 768       && !(bitnum % GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
 769     {
 770       struct expand_operand ops[3];
 771       machine_mode outermode = GET_MODE (op0);
 772       machine_mode innermode = GET_MODE_INNER (outermode);
 773       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 774       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 775
 776       create_fixed_operand (&ops[0], op0);
 777       create_input_operand (&ops[1], value, innermode);
 778       create_integer_operand (&ops[2], pos);
 779       if (maybe_expand_insn (icode, 3, ops))
 780         return true;
 781     }
 782
 783   /* If the target is a register, overwriting the entire object, or storing
 784      a full-word or multi-word field can be done with just a SUBREG.  */
 785   if (!MEM_P (op0)
 786       && bitsize == GET_MODE_BITSIZE (fieldmode)
 787       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 788           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 789     {
 790       /* Use the subreg machinery either to narrow OP0 to the required
 791          words or to cope with mode punning between equal-sized modes.
 792          In the latter case, use subreg on the rhs side, not lhs.  */
 793       rtx sub;
 794
 795       if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
 796         {
 797           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 798           if (sub)
 799             {
 800               if (reverse)
 801                 sub = flip_storage_order (GET_MODE (op0), sub);
 802               emit_move_insn (op0, sub);
 803               return true;
 804             }
 805         }
 806       else
 807         {
 808           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 809                                      bitnum / BITS_PER_UNIT);
 810           if (sub)
 811             {
 812               if (reverse)
 813                 value = flip_storage_order (fieldmode, value);
 814               emit_move_insn (sub, value);
 815               return true;
 816             }
 817         }
 818     }
 819
 820   /* If the target is memory, storing any naturally aligned field can be
 821      done with a simple store.  For targets that support fast unaligned
 822      memory, any naturally sized, unit aligned field can be done directly.  */
 823   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
 824     {
 825       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 826       if (reverse)
 827         value = flip_storage_order (fieldmode, value);
 828       emit_move_insn (op0, value);
 829       return true;
 830     }
 831
 832   /* Make sure we are playing with integral modes.  Pun with subregs
 833      if we aren't.  This must come after the entire register case above,
 834      since that case is valid for any mode.  The following cases are only
 835      valid for integral modes.  */
 836   {
 837     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 838     if (imode != GET_MODE (op0))
 839       {
 840         if (MEM_P (op0))
 841           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
 842         else
 843           {
 844             gcc_assert (imode != BLKmode);
 845             op0 = gen_lowpart (imode, op0);
 846           }
 847       }
 848   }
 849
 850   /* Storing an lsb-aligned field in a register
 851      can be done with a movstrict instruction.  */
 852
 853   if (!MEM_P (op0)
 854       && !reverse
 855       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 856       && bitsize == GET_MODE_BITSIZE (fieldmode)
 857       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 858     {
 859       struct expand_operand ops[2];
 860       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 861       rtx arg0 = op0;
 862       unsigned HOST_WIDE_INT subreg_off;
 863
 864       if (GET_CODE (arg0) == SUBREG)
 865         {
 866           /* Else we've got some float mode source being extracted into
 867              a different float mode destination -- this combination of
 868              subregs results in Severe Tire Damage.  */
 869           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 870                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 871                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 872           arg0 = SUBREG_REG (arg0);
 873         }
 874
 875       subreg_off = bitnum / BITS_PER_UNIT;
 876       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 877         {
 878           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 879
 880           create_fixed_operand (&ops[0], arg0);
 881           /* Shrink the source operand to FIELDMODE.  */
 882           create_convert_operand_to (&ops[1], value, fieldmode, false);
 883           if (maybe_expand_insn (icode, 2, ops))
 884             return true;
 885         }
 886     }
 887
 888   /* Handle fields bigger than a word.  */
 889
 890   if (bitsize > BITS_PER_WORD)
 891     {
 892       /* Here we transfer the words of the field
 893          in the order least significant first.
 894          This is because the most significant word is the one which may
 895          be less than full.
 896          However, only do that if the value is not BLKmode.  */
 897
 898       const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 899       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 900       unsigned int i;
 901       rtx_insn *last;
 902
 903       /* This is the mode we must force value to, so that there will be enough
 904          subwords to extract.  Note that fieldmode will often (always?) be
 905          VOIDmode, because that is what store_field uses to indicate that this
 906          is a bit field, but passing VOIDmode to operand_subword_force
 907          is not allowed.  */
 908       fieldmode = GET_MODE (value);
 909       if (fieldmode == VOIDmode)
 910         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 911
 912       last = get_last_insn ();
 913       for (i = 0; i < nwords; i++)
 914         {
 915           /* If I is 0, use the low-order word in both field and target;
 916              if I is 1, use the next to lowest word; and so on.  */
 917           unsigned int wordnum = (backwards
 918                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 919                                   - i - 1
 920                                   : i);
 921           unsigned int bit_offset = (backwards ^ reverse
 922                                      ? MAX ((int) bitsize - ((int) i + 1)
 923                                             * BITS_PER_WORD,
 924                                             0)
 925                                      : (int) i * BITS_PER_WORD);
 926           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 927           unsigned HOST_WIDE_INT new_bitsize =
 928             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 929
 930           /* If the remaining chunk doesn't have full wordsize we have
 931              to make sure that for big-endian machines the higher order
 932              bits are used.  */
 933           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 934             value_word = simplify_expand_binop (word_mode, lshr_optab,
 935                                                 value_word,
 936                                                 GEN_INT (BITS_PER_WORD
 937                                                          - new_bitsize),
 938                                                 NULL_RTX, true,
 939                                                 OPTAB_LIB_WIDEN);
 940
 941           if (!store_bit_field_1 (op0, new_bitsize,
 942                                   bitnum + bit_offset,
 943                                   bitregion_start, bitregion_end,
 944                                   word_mode,
 945                                   value_word, reverse, fallback_p))
 946             {
 947               delete_insns_since (last);
 948               return false;
 949             }
 950         }
 951       return true;
 952     }
 953
 954   /* If VALUE has a floating-point or complex mode, access it as an
 955      integer of the corresponding size.  This can occur on a machine
 956      with 64 bit registers that uses SFmode for float.  It can also
 957      occur for unaligned float or complex fields.  */
 958   orig_value = value;
 959   if (GET_MODE (value) != VOIDmode
 960       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 961       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 962     {
 963       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 964       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 965     }
 966
 967   /* If OP0 is a multi-word register, narrow it to the affected word.
 968      If the region spans two words, defer to store_split_bit_field.
 969      Don't do this if op0 is a single hard register wider than word
 970      such as a float or vector register.  */
 971   if (!MEM_P (op0)
 972       && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD
 973       && (!REG_P (op0)
 974           || !HARD_REGISTER_P (op0)
 975           || HARD_REGNO_NREGS (REGNO (op0), GET_MODE (op0)) != 1))
 976     {
 977       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
 978         {
 979           if (!fallback_p)
 980             return false;
 981
 982           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 983                                  bitregion_end, value, reverse);
 984           return true;
 985         }
 986       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
 987                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 988       gcc_assert (op0);
 989       bitnum %= BITS_PER_WORD;
 990     }
 991
 992   /* From here on we can assume that the field to be stored in fits
 993      within a word.  If the destination is a register, it too fits
 994      in a word.  */
 995
 996   extraction_insn insv;
 997   if (!MEM_P (op0)
 998       && !reverse
 999       && get_best_reg_extraction_insn (&insv, EP_insv,
1000                                        GET_MODE_BITSIZE (GET_MODE (op0)),
1001                                        fieldmode)
1002       && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
1003     return true;
1004
1005   /* If OP0 is a memory, try copying it to a register and seeing if a
1006      cheap register alternative is available.  */
1007   if (MEM_P (op0) && !reverse)
1008     {
1009       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1010                                         fieldmode)
1011           && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
1012         return true;
1013
1014       rtx_insn *last = get_last_insn ();
1015
1016       /* Try loading part of OP0 into a register, inserting the bitfield
1017          into that, and then copying the result back to OP0.  */
1018       unsigned HOST_WIDE_INT bitpos;
1019       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1020                                                bitregion_start, bitregion_end,
1021                                                fieldmode, &bitpos);
1022       if (xop0)
1023         {
1024           rtx tempreg = copy_to_reg (xop0);
1025           if (store_bit_field_1 (tempreg, bitsize, bitpos,
1026                                  bitregion_start, bitregion_end,
1027                                  fieldmode, orig_value, reverse, false))
1028             {
1029               emit_move_insn (xop0, tempreg);
1030               return true;
1031             }
1032           delete_insns_since (last);
1033         }
1034     }
1035
1036   if (!fallback_p)
1037     return false;
1038
1039   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
1040                          bitregion_end, value, reverse);
1041   return true;
1042 }
1043
1044 /* Generate code to store value from rtx VALUE
1045    into a bit-field within structure STR_RTX
1046    containing BITSIZE bits starting at bit BITNUM.
1047
1048    BITREGION_START is bitpos of the first bitfield in this region.
1049    BITREGION_END is the bitpos of the ending bitfield in this region.
1050    These two fields are 0, if the C++ memory model does not apply,
1051    or we are not interested in keeping track of bitfield regions.
1052
1053    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1054
1055    If REVERSE is true, the store is to be done in reverse order.  */
1056
1057 void
1058 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1059                  unsigned HOST_WIDE_INT bitnum,
1060                  unsigned HOST_WIDE_INT bitregion_start,
1061                  unsigned HOST_WIDE_INT bitregion_end,
1062                  machine_mode fieldmode,
1063                  rtx value, bool reverse)
1064 {
1065   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1066   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
1067                                   bitregion_start, bitregion_end))
1068     {
1069       /* Storing of a full word can be done with a simple store.
1070          We know here that the field can be accessed with one single
1071          instruction.  For targets that support unaligned memory,
1072          an unaligned access may be necessary.  */
1073       if (bitsize == GET_MODE_BITSIZE (fieldmode))
1074         {
1075           str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
1076                                              bitnum / BITS_PER_UNIT);
1077           if (reverse)
1078             value = flip_storage_order (fieldmode, value);
1079           gcc_assert (bitnum % BITS_PER_UNIT == 0);
1080           emit_move_insn (str_rtx, value);
1081         }
1082       else
1083         {
1084           rtx temp;
1085
1086           str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
1087                                           &bitnum);
1088           gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (fieldmode));
1089           temp = copy_to_reg (str_rtx);
1090           if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
1091                                   fieldmode, value, reverse, true))
1092             gcc_unreachable ();
1093
1094           emit_move_insn (str_rtx, temp);
1095         }
1096
1097       return;
1098     }
1099
1100   /* Under the C++0x memory model, we must not touch bits outside the
1101      bit region.  Adjust the address to start at the beginning of the
1102      bit region.  */
1103   if (MEM_P (str_rtx) && bitregion_start > 0)
1104     {
1105       machine_mode bestmode;
1106       HOST_WIDE_INT offset, size;
1107
1108       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1109
1110       offset = bitregion_start / BITS_PER_UNIT;
1111       bitnum -= bitregion_start;
1112       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1113       bitregion_end -= bitregion_start;
1114       bitregion_start = 0;
1115       bestmode = get_best_mode (bitsize, bitnum,
1116                                 bitregion_start, bitregion_end,
1117                                 MEM_ALIGN (str_rtx), VOIDmode,
1118                                 MEM_VOLATILE_P (str_rtx));
1119       str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
1120     }
1121
1122   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1123                           bitregion_start, bitregion_end,
1124                           fieldmode, value, reverse, true))
1125     gcc_unreachable ();
1126 }
1127 \f
1128 /* Use shifts and boolean operations to store VALUE into a bit field of
1129    width BITSIZE in OP0, starting at bit BITNUM.
1130
1131    If REVERSE is true, the store is to be done in reverse order.  */
1132
1133 static void
1134 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1135                        unsigned HOST_WIDE_INT bitnum,
1136                        unsigned HOST_WIDE_INT bitregion_start,
1137                        unsigned HOST_WIDE_INT bitregion_end,
1138                        rtx value, bool reverse)
1139 {
1140   /* There is a case not handled here:
1141      a structure with a known alignment of just a halfword
1142      and a field split across two aligned halfwords within the structure.
1143      Or likewise a structure with a known alignment of just a byte
1144      and a field split across two bytes.
1145      Such cases are not supposed to be able to occur.  */
1146
1147   if (MEM_P (op0))
1148     {
1149       machine_mode mode = GET_MODE (op0);
1150       if (GET_MODE_BITSIZE (mode) == 0
1151           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1152         mode = word_mode;
1153       mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1154                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1155
1156       if (mode == VOIDmode)
1157         {
1158           /* The only way this should occur is if the field spans word
1159              boundaries.  */
1160           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1161                                  bitregion_end, value, reverse);
1162           return;
1163         }
1164
1165       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1166     }
1167
1168   store_fixed_bit_field_1 (op0, bitsize, bitnum, value, reverse);
1169 }
1170
1171 /* Helper function for store_fixed_bit_field, stores
1172    the bit field always using the MODE of OP0.  */
1173
1174 static void
1175 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1176                          unsigned HOST_WIDE_INT bitnum,
1177                          rtx value, bool reverse)
1178 {
1179   machine_mode mode;
1180   rtx temp;
1181   int all_zero = 0;
1182   int all_one = 0;
1183
1184   mode = GET_MODE (op0);
1185   gcc_assert (SCALAR_INT_MODE_P (mode));
1186
1187   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1188      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1189
1190   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1191     /* BITNUM is the distance between our msb
1192        and that of the containing datum.
1193        Convert it to the distance from the lsb.  */
1194     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1195
1196   /* Now BITNUM is always the distance between our lsb
1197      and that of OP0.  */
1198
1199   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1200      we must first convert its mode to MODE.  */
1201
1202   if (CONST_INT_P (value))
1203     {
1204       unsigned HOST_WIDE_INT v = UINTVAL (value);
1205
1206       if (bitsize < HOST_BITS_PER_WIDE_INT)
1207         v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1208
1209       if (v == 0)
1210         all_zero = 1;
1211       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1212                 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1213                || (bitsize == HOST_BITS_PER_WIDE_INT
1214                    && v == HOST_WIDE_INT_M1U))
1215         all_one = 1;
1216
1217       value = lshift_value (mode, v, bitnum);
1218     }
1219   else
1220     {
1221       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1222                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1223
1224       if (GET_MODE (value) != mode)
1225         value = convert_to_mode (mode, value, 1);
1226
1227       if (must_and)
1228         value = expand_binop (mode, and_optab, value,
1229                               mask_rtx (mode, 0, bitsize, 0),
1230                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1231       if (bitnum > 0)
1232         value = expand_shift (LSHIFT_EXPR, mode, value,
1233                               bitnum, NULL_RTX, 1);
1234     }
1235
1236   if (reverse)
1237     value = flip_storage_order (mode, value);
1238
1239   /* Now clear the chosen bits in OP0,
1240      except that if VALUE is -1 we need not bother.  */
1241   /* We keep the intermediates in registers to allow CSE to combine
1242      consecutive bitfield assignments.  */
1243
1244   temp = force_reg (mode, op0);
1245
1246   if (! all_one)
1247     {
1248       rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1249       if (reverse)
1250         mask = flip_storage_order (mode, mask);
1251       temp = expand_binop (mode, and_optab, temp, mask,
1252                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1253       temp = force_reg (mode, temp);
1254     }
1255
1256   /* Now logical-or VALUE into OP0, unless it is zero.  */
1257
1258   if (! all_zero)
1259     {
1260       temp = expand_binop (mode, ior_optab, temp, value,
1261                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1262       temp = force_reg (mode, temp);
1263     }
1264
1265   if (op0 != temp)
1266     {
1267       op0 = copy_rtx (op0);
1268       emit_move_insn (op0, temp);
1269     }
1270 }
1271 \f
1272 /* Store a bit field that is split across multiple accessible memory objects.
1273
1274    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1275    BITSIZE is the field width; BITPOS the position of its first bit
1276    (within the word).
1277    VALUE is the value to store.
1278
1279    If REVERSE is true, the store is to be done in reverse order.
1280
1281    This does not yet handle fields wider than BITS_PER_WORD.  */
1282
1283 static void
1284 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1285                        unsigned HOST_WIDE_INT bitpos,
1286                        unsigned HOST_WIDE_INT bitregion_start,
1287                        unsigned HOST_WIDE_INT bitregion_end,
1288                        rtx value, bool reverse)
1289 {
1290   unsigned int unit, total_bits, bitsdone = 0;
1291
1292   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1293      much at a time.  */
1294   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1295     unit = BITS_PER_WORD;
1296   else
1297     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1298
1299   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1300      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1301      again, and we will mutually recurse forever.  */
1302   if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1303     unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1304
1305   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1306      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1307      that VALUE might be a floating-point constant.  */
1308   if (CONSTANT_P (value) && !CONST_INT_P (value))
1309     {
1310       rtx word = gen_lowpart_common (word_mode, value);
1311
1312       if (word && (value != word))
1313         value = word;
1314       else
1315         value = gen_lowpart_common (word_mode,
1316                                     force_reg (GET_MODE (value) != VOIDmode
1317                                                ? GET_MODE (value)
1318                                                : word_mode, value));
1319     }
1320
1321   total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1322
1323   while (bitsdone < bitsize)
1324     {
1325       unsigned HOST_WIDE_INT thissize;
1326       unsigned HOST_WIDE_INT thispos;
1327       unsigned HOST_WIDE_INT offset;
1328       rtx part, word;
1329
1330       offset = (bitpos + bitsdone) / unit;
1331       thispos = (bitpos + bitsdone) % unit;
1332
1333       /* When region of bytes we can touch is restricted, decrease
1334          UNIT close to the end of the region as needed.  If op0 is a REG
1335          or SUBREG of REG, don't do this, as there can't be data races
1336          on a register and we can expand shorter code in some cases.  */
1337       if (bitregion_end
1338           && unit > BITS_PER_UNIT
1339           && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1340           && !REG_P (op0)
1341           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1342         {
1343           unit = unit / 2;
1344           continue;
1345         }
1346
1347       /* THISSIZE must not overrun a word boundary.  Otherwise,
1348          store_fixed_bit_field will call us again, and we will mutually
1349          recurse forever.  */
1350       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1351       thissize = MIN (thissize, unit - thispos);
1352
1353       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1354         {
1355           /* Fetch successively less significant portions.  */
1356           if (CONST_INT_P (value))
1357             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1358                              >> (bitsize - bitsdone - thissize))
1359                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1360           /* Likewise, but the source is little-endian.  */
1361           else if (reverse)
1362             part = extract_fixed_bit_field (word_mode, value, thissize,
1363                                             bitsize - bitsdone - thissize,
1364                                             NULL_RTX, 1, false);
1365           else
1366             {
1367               int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1368               /* The args are chosen so that the last part includes the
1369                  lsb.  Give extract_bit_field the value it needs (with
1370                  endianness compensation) to fetch the piece we want.  */
1371               part = extract_fixed_bit_field (word_mode, value, thissize,
1372                                               total_bits - bitsize + bitsdone,
1373                                               NULL_RTX, 1, false);
1374             }
1375         }
1376       else
1377         {
1378           /* Fetch successively more significant portions.  */
1379           if (CONST_INT_P (value))
1380             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1381                              >> bitsdone)
1382                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1383           /* Likewise, but the source is big-endian.  */
1384           else if (reverse)
1385             part = extract_fixed_bit_field (word_mode, value, thissize,
1386                                             total_bits - bitsdone - thissize,
1387                                             NULL_RTX, 1, false);
1388           else
1389             part = extract_fixed_bit_field (word_mode, value, thissize,
1390                                             bitsdone, NULL_RTX, 1, false);
1391         }
1392
1393       /* If OP0 is a register, then handle OFFSET here.  */
1394       if (SUBREG_P (op0) || REG_P (op0))
1395         {
1396           machine_mode op0_mode = GET_MODE (op0);
1397           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1398             word = offset ? const0_rtx : op0;
1399           else
1400             word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1401                                           GET_MODE (op0));
1402           offset &= BITS_PER_WORD / unit - 1;
1403         }
1404       else
1405         word = op0;
1406
1407       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1408          it is just an out-of-bounds access.  Ignore it.  */
1409       if (word != const0_rtx)
1410         store_fixed_bit_field (word, thissize, offset * unit + thispos,
1411                                bitregion_start, bitregion_end, part,
1412                                reverse);
1413       bitsdone += thissize;
1414     }
1415 }
1416 \f
1417 /* A subroutine of extract_bit_field_1 that converts return value X
1418    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1419    to extract_bit_field.  */
1420
1421 static rtx
1422 convert_extracted_bit_field (rtx x, machine_mode mode,
1423                              machine_mode tmode, bool unsignedp)
1424 {
1425   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1426     return x;
1427
1428   /* If the x mode is not a scalar integral, first convert to the
1429      integer mode of that size and then access it as a floating-point
1430      value via a SUBREG.  */
1431   if (!SCALAR_INT_MODE_P (tmode))
1432     {
1433       machine_mode smode;
1434
1435       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1436       x = convert_to_mode (smode, x, unsignedp);
1437       x = force_reg (smode, x);
1438       return gen_lowpart (tmode, x);
1439     }
1440
1441   return convert_to_mode (tmode, x, unsignedp);
1442 }
1443
1444 /* Try to use an ext(z)v pattern to extract a field from OP0.
1445    Return the extracted value on success, otherwise return null.
1446    EXT_MODE is the mode of the extraction and the other arguments
1447    are as for extract_bit_field.  */
1448
1449 static rtx
1450 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1451                               unsigned HOST_WIDE_INT bitsize,
1452                               unsigned HOST_WIDE_INT bitnum,
1453                               int unsignedp, rtx target,
1454                               machine_mode mode, machine_mode tmode)
1455 {
1456   struct expand_operand ops[4];
1457   rtx spec_target = target;
1458   rtx spec_target_subreg = 0;
1459   machine_mode ext_mode = extv->field_mode;
1460   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1461
1462   if (bitsize == 0 || unit < bitsize)
1463     return NULL_RTX;
1464
1465   if (MEM_P (op0))
1466     /* Get a reference to the first byte of the field.  */
1467     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1468                                 &bitnum);
1469   else
1470     {
1471       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1472       if (BYTES_BIG_ENDIAN)
1473         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1474
1475       /* If op0 is a register, we need it in EXT_MODE to make it
1476          acceptable to the format of ext(z)v.  */
1477       if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1478         return NULL_RTX;
1479       if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1480         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1481     }
1482
1483   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1484      "backwards" from the size of the unit we are extracting from.
1485      Otherwise, we count bits from the most significant on a
1486      BYTES/BITS_BIG_ENDIAN machine.  */
1487
1488   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1489     bitnum = unit - bitsize - bitnum;
1490
1491   if (target == 0)
1492     target = spec_target = gen_reg_rtx (tmode);
1493
1494   if (GET_MODE (target) != ext_mode)
1495     {
1496       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1497          between the mode of the extraction (word_mode) and the target
1498          mode.  Instead, create a temporary and use convert_move to set
1499          the target.  */
1500       if (REG_P (target)
1501           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1502         {
1503           target = gen_lowpart (ext_mode, target);
1504           if (GET_MODE_PRECISION (ext_mode)
1505               > GET_MODE_PRECISION (GET_MODE (spec_target)))
1506             spec_target_subreg = target;
1507         }
1508       else
1509         target = gen_reg_rtx (ext_mode);
1510     }
1511
1512   create_output_operand (&ops[0], target, ext_mode);
1513   create_fixed_operand (&ops[1], op0);
1514   create_integer_operand (&ops[2], bitsize);
1515   create_integer_operand (&ops[3], bitnum);
1516   if (maybe_expand_insn (extv->icode, 4, ops))
1517     {
1518       target = ops[0].value;
1519       if (target == spec_target)
1520         return target;
1521       if (target == spec_target_subreg)
1522         return spec_target;
1523       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1524     }
1525   return NULL_RTX;
1526 }
1527
1528 /* A subroutine of extract_bit_field, with the same arguments.
1529    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1530    if we can find no other means of implementing the operation.
1531    if FALLBACK_P is false, return NULL instead.  */
1532
1533 static rtx
1534 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1535                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1536                      machine_mode mode, machine_mode tmode,
1537                      bool reverse, bool fallback_p)
1538 {
1539   rtx op0 = str_rtx;
1540   machine_mode int_mode;
1541   machine_mode mode1;
1542
1543   if (tmode == VOIDmode)
1544     tmode = mode;
1545
1546   while (GET_CODE (op0) == SUBREG)
1547     {
1548       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1549       op0 = SUBREG_REG (op0);
1550     }
1551
1552   /* If we have an out-of-bounds access to a register, just return an
1553      uninitialized register of the required mode.  This can occur if the
1554      source code contains an out-of-bounds access to a small array.  */
1555   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1556     return gen_reg_rtx (tmode);
1557
1558   if (REG_P (op0)
1559       && mode == GET_MODE (op0)
1560       && bitnum == 0
1561       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1562     {
1563       if (reverse)
1564         op0 = flip_storage_order (mode, op0);
1565       /* We're trying to extract a full register from itself.  */
1566       return op0;
1567     }
1568
1569   /* See if we can get a better vector mode before extracting.  */
1570   if (VECTOR_MODE_P (GET_MODE (op0))
1571       && !MEM_P (op0)
1572       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1573     {
1574       machine_mode new_mode;
1575
1576       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1577         new_mode = MIN_MODE_VECTOR_FLOAT;
1578       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1579         new_mode = MIN_MODE_VECTOR_FRACT;
1580       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1581         new_mode = MIN_MODE_VECTOR_UFRACT;
1582       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1583         new_mode = MIN_MODE_VECTOR_ACCUM;
1584       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1585         new_mode = MIN_MODE_VECTOR_UACCUM;
1586       else
1587         new_mode = MIN_MODE_VECTOR_INT;
1588
1589       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1590         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1591             && GET_MODE_UNIT_SIZE (new_mode) == GET_MODE_SIZE (tmode)
1592             && targetm.vector_mode_supported_p (new_mode))
1593           break;
1594       if (new_mode != VOIDmode)
1595         op0 = gen_lowpart (new_mode, op0);
1596     }
1597
1598   /* Use vec_extract patterns for extracting parts of vectors whenever
1599      available.  */
1600   if (VECTOR_MODE_P (GET_MODE (op0))
1601       && !MEM_P (op0)
1602       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1603       && ((bitnum + bitsize - 1) / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
1604           == bitnum / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
1605     {
1606       struct expand_operand ops[3];
1607       machine_mode outermode = GET_MODE (op0);
1608       machine_mode innermode = GET_MODE_INNER (outermode);
1609       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1610       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1611
1612       create_output_operand (&ops[0], target, innermode);
1613       create_input_operand (&ops[1], op0, outermode);
1614       create_integer_operand (&ops[2], pos);
1615       if (maybe_expand_insn (icode, 3, ops))
1616         {
1617           target = ops[0].value;
1618           if (GET_MODE (target) != mode)
1619             return gen_lowpart (tmode, target);
1620           return target;
1621         }
1622     }
1623
1624   /* Make sure we are playing with integral modes.  Pun with subregs
1625      if we aren't.  */
1626   {
1627     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1628     if (imode != GET_MODE (op0))
1629       {
1630         if (MEM_P (op0))
1631           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1632         else if (imode != BLKmode)
1633           {
1634             op0 = gen_lowpart (imode, op0);
1635
1636             /* If we got a SUBREG, force it into a register since we
1637                aren't going to be able to do another SUBREG on it.  */
1638             if (GET_CODE (op0) == SUBREG)
1639               op0 = force_reg (imode, op0);
1640           }
1641         else
1642           {
1643             HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1644             rtx mem = assign_stack_temp (GET_MODE (op0), size);
1645             emit_move_insn (mem, op0);
1646             op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1647           }
1648       }
1649   }
1650
1651   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1652      If that's wrong, the solution is to test for it and set TARGET to 0
1653      if needed.  */
1654
1655   /* Get the mode of the field to use for atomic access or subreg
1656      conversion.  */
1657   mode1 = mode;
1658   if (SCALAR_INT_MODE_P (tmode))
1659     {
1660       machine_mode try_mode = mode_for_size (bitsize,
1661                                                   GET_MODE_CLASS (tmode), 0);
1662       if (try_mode != BLKmode)
1663         mode1 = try_mode;
1664     }
1665   gcc_assert (mode1 != BLKmode);
1666
1667   /* Extraction of a full MODE1 value can be done with a subreg as long
1668      as the least significant bit of the value is the least significant
1669      bit of either OP0 or a word of OP0.  */
1670   if (!MEM_P (op0)
1671       && !reverse
1672       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1673       && bitsize == GET_MODE_BITSIZE (mode1)
1674       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1675     {
1676       rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1677                                      bitnum / BITS_PER_UNIT);
1678       if (sub)
1679         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1680     }
1681
1682   /* Extraction of a full MODE1 value can be done with a load as long as
1683      the field is on a byte boundary and is sufficiently aligned.  */
1684   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1685     {
1686       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1687       if (reverse)
1688         op0 = flip_storage_order (mode1, op0);
1689       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1690     }
1691
1692   /* Handle fields bigger than a word.  */
1693
1694   if (bitsize > BITS_PER_WORD)
1695     {
1696       /* Here we transfer the words of the field
1697          in the order least significant first.
1698          This is because the most significant word is the one which may
1699          be less than full.  */
1700
1701       const bool backwards = WORDS_BIG_ENDIAN;
1702       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1703       unsigned int i;
1704       rtx_insn *last;
1705
1706       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1707         target = gen_reg_rtx (mode);
1708
1709       /* In case we're about to clobber a base register or something
1710          (see gcc.c-torture/execute/20040625-1.c).   */
1711       if (reg_mentioned_p (target, str_rtx))
1712         target = gen_reg_rtx (mode);
1713
1714       /* Indicate for flow that the entire target reg is being set.  */
1715       emit_clobber (target);
1716
1717       last = get_last_insn ();
1718       for (i = 0; i < nwords; i++)
1719         {
1720           /* If I is 0, use the low-order word in both field and target;
1721              if I is 1, use the next to lowest word; and so on.  */
1722           /* Word number in TARGET to use.  */
1723           unsigned int wordnum
1724             = (backwards
1725                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1726                : i);
1727           /* Offset from start of field in OP0.  */
1728           unsigned int bit_offset = (backwards ^ reverse
1729                                      ? MAX ((int) bitsize - ((int) i + 1)
1730                                             * BITS_PER_WORD,
1731                                             0)
1732                                      : (int) i * BITS_PER_WORD);
1733           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1734           rtx result_part
1735             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1736                                              bitsize - i * BITS_PER_WORD),
1737                                    bitnum + bit_offset, 1, target_part,
1738                                    mode, word_mode, reverse, fallback_p);
1739
1740           gcc_assert (target_part);
1741           if (!result_part)
1742             {
1743               delete_insns_since (last);
1744               return NULL;
1745             }
1746
1747           if (result_part != target_part)
1748             emit_move_insn (target_part, result_part);
1749         }
1750
1751       if (unsignedp)
1752         {
1753           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1754              need to be zero'd out.  */
1755           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1756             {
1757               unsigned int i, total_words;
1758
1759               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1760               for (i = nwords; i < total_words; i++)
1761                 emit_move_insn
1762                   (operand_subword (target,
1763                                     backwards ? total_words - i - 1 : i,
1764                                     1, VOIDmode),
1765                    const0_rtx);
1766             }
1767           return target;
1768         }
1769
1770       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1771       target = expand_shift (LSHIFT_EXPR, mode, target,
1772                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1773       return expand_shift (RSHIFT_EXPR, mode, target,
1774                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1775     }
1776
1777   /* If OP0 is a multi-word register, narrow it to the affected word.
1778      If the region spans two words, defer to extract_split_bit_field.  */
1779   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1780     {
1781       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1782         {
1783           if (!fallback_p)
1784             return NULL_RTX;
1785           target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1786                                             reverse);
1787           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1788         }
1789       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1790                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1791       bitnum %= BITS_PER_WORD;
1792     }
1793
1794   /* From here on we know the desired field is smaller than a word.
1795      If OP0 is a register, it too fits within a word.  */
1796   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1797   extraction_insn extv;
1798   if (!MEM_P (op0)
1799       && !reverse
1800       /* ??? We could limit the structure size to the part of OP0 that
1801          contains the field, with appropriate checks for endianness
1802          and TRULY_NOOP_TRUNCATION.  */
1803       && get_best_reg_extraction_insn (&extv, pattern,
1804                                        GET_MODE_BITSIZE (GET_MODE (op0)),
1805                                        tmode))
1806     {
1807       rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1808                                                  unsignedp, target, mode,
1809                                                  tmode);
1810       if (result)
1811         return result;
1812     }
1813
1814   /* If OP0 is a memory, try copying it to a register and seeing if a
1815      cheap register alternative is available.  */
1816   if (MEM_P (op0) & !reverse)
1817     {
1818       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1819                                         tmode))
1820         {
1821           rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1822                                                      bitnum, unsignedp,
1823                                                      target, mode,
1824                                                      tmode);
1825           if (result)
1826             return result;
1827         }
1828
1829       rtx_insn *last = get_last_insn ();
1830
1831       /* Try loading part of OP0 into a register and extracting the
1832          bitfield from that.  */
1833       unsigned HOST_WIDE_INT bitpos;
1834       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1835                                                0, 0, tmode, &bitpos);
1836       if (xop0)
1837         {
1838           xop0 = copy_to_reg (xop0);
1839           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1840                                             unsignedp, target,
1841                                             mode, tmode, reverse, false);
1842           if (result)
1843             return result;
1844           delete_insns_since (last);
1845         }
1846     }
1847
1848   if (!fallback_p)
1849     return NULL;
1850
1851   /* Find a correspondingly-sized integer field, so we can apply
1852      shifts and masks to it.  */
1853   int_mode = int_mode_for_mode (tmode);
1854   if (int_mode == BLKmode)
1855     int_mode = int_mode_for_mode (mode);
1856   /* Should probably push op0 out to memory and then do a load.  */
1857   gcc_assert (int_mode != BLKmode);
1858
1859   target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum, target,
1860                                     unsignedp, reverse);
1861
1862   /* Complex values must be reversed piecewise, so we need to undo the global
1863      reversal, convert to the complex mode and reverse again.  */
1864   if (reverse && COMPLEX_MODE_P (tmode))
1865     {
1866       target = flip_storage_order (int_mode, target);
1867       target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1868       target = flip_storage_order (tmode, target);
1869     }
1870   else
1871     target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1872
1873   return target;
1874 }
1875
1876 /* Generate code to extract a byte-field from STR_RTX
1877    containing BITSIZE bits, starting at BITNUM,
1878    and put it in TARGET if possible (if TARGET is nonzero).
1879    Regardless of TARGET, we return the rtx for where the value is placed.
1880
1881    STR_RTX is the structure containing the byte (a REG or MEM).
1882    UNSIGNEDP is nonzero if this is an unsigned bit field.
1883    MODE is the natural mode of the field value once extracted.
1884    TMODE is the mode the caller would like the value to have;
1885    but the value may be returned with type MODE instead.
1886
1887    If REVERSE is true, the extraction is to be done in reverse order.
1888
1889    If a TARGET is specified and we can store in it at no extra cost,
1890    we do so, and return TARGET.
1891    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1892    if they are equally easy.  */
1893
1894 rtx
1895 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1896                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1897                    machine_mode mode, machine_mode tmode, bool reverse)
1898 {
1899   machine_mode mode1;
1900
1901   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1902   if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1903     mode1 = GET_MODE (str_rtx);
1904   else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1905     mode1 = GET_MODE (target);
1906   else
1907     mode1 = tmode;
1908
1909   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1910     {
1911       /* Extraction of a full MODE1 value can be done with a simple load.
1912          We know here that the field can be accessed with one single
1913          instruction.  For targets that support unaligned memory,
1914          an unaligned access may be necessary.  */
1915       if (bitsize == GET_MODE_BITSIZE (mode1))
1916         {
1917           rtx result = adjust_bitfield_address (str_rtx, mode1,
1918                                                 bitnum / BITS_PER_UNIT);
1919           if (reverse)
1920             result = flip_storage_order (mode1, result);
1921           gcc_assert (bitnum % BITS_PER_UNIT == 0);
1922           return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1923         }
1924
1925       str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1926                                       &bitnum);
1927       gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (mode1));
1928       str_rtx = copy_to_reg (str_rtx);
1929     }
1930
1931   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1932                               target, mode, tmode, reverse, true);
1933 }
1934 \f
1935 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1936    from bit BITNUM of OP0.
1937
1938    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1939    If REVERSE is true, the extraction is to be done in reverse order.
1940
1941    If TARGET is nonzero, attempts to store the value there
1942    and return TARGET, but this is not guaranteed.
1943    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1944
1945 static rtx
1946 extract_fixed_bit_field (machine_mode tmode, rtx op0,
1947                          unsigned HOST_WIDE_INT bitsize,
1948                          unsigned HOST_WIDE_INT bitnum, rtx target,
1949                          int unsignedp, bool reverse)
1950 {
1951   if (MEM_P (op0))
1952     {
1953       machine_mode mode
1954         = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1955                          MEM_VOLATILE_P (op0));
1956
1957       if (mode == VOIDmode)
1958         /* The only way this should occur is if the field spans word
1959            boundaries.  */
1960         return extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1961                                         reverse);
1962
1963       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1964     }
1965
1966   return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1967                                     target, unsignedp, reverse);
1968 }
1969
1970 /* Helper function for extract_fixed_bit_field, extracts
1971    the bit field always using the MODE of OP0.  */
1972
1973 static rtx
1974 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
1975                            unsigned HOST_WIDE_INT bitsize,
1976                            unsigned HOST_WIDE_INT bitnum, rtx target,
1977                            int unsignedp, bool reverse)
1978 {
1979   machine_mode mode = GET_MODE (op0);
1980   gcc_assert (SCALAR_INT_MODE_P (mode));
1981
1982   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1983      for invalid input, such as extract equivalent of f5 from
1984      gcc.dg/pr48335-2.c.  */
1985
1986   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1987     /* BITNUM is the distance between our msb and that of OP0.
1988        Convert it to the distance from the lsb.  */
1989     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1990
1991   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1992      We have reduced the big-endian case to the little-endian case.  */
1993   if (reverse)
1994     op0 = flip_storage_order (mode, op0);
1995
1996   if (unsignedp)
1997     {
1998       if (bitnum)
1999         {
2000           /* If the field does not already start at the lsb,
2001              shift it so it does.  */
2002           /* Maybe propagate the target for the shift.  */
2003           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2004           if (tmode != mode)
2005             subtarget = 0;
2006           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2007         }
2008       /* Convert the value to the desired mode.  */
2009       if (mode != tmode)
2010         op0 = convert_to_mode (tmode, op0, 1);
2011
2012       /* Unless the msb of the field used to be the msb when we shifted,
2013          mask out the upper bits.  */
2014
2015       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2016         return expand_binop (GET_MODE (op0), and_optab, op0,
2017                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
2018                              target, 1, OPTAB_LIB_WIDEN);
2019       return op0;
2020     }
2021
2022   /* To extract a signed bit-field, first shift its msb to the msb of the word,
2023      then arithmetic-shift its lsb to the lsb of the word.  */
2024   op0 = force_reg (mode, op0);
2025
2026   /* Find the narrowest integer mode that contains the field.  */
2027
2028   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
2029        mode = GET_MODE_WIDER_MODE (mode))
2030     if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
2031       {
2032         op0 = convert_to_mode (mode, op0, 0);
2033         break;
2034       }
2035
2036   if (mode != tmode)
2037     target = 0;
2038
2039   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2040     {
2041       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2042       /* Maybe propagate the target for the shift.  */
2043       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2044       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2045     }
2046
2047   return expand_shift (RSHIFT_EXPR, mode, op0,
2048                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2049 }
2050
2051 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2052    VALUE << BITPOS.  */
2053
2054 static rtx
2055 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2056               int bitpos)
2057 {
2058   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2059 }
2060 \f
2061 /* Extract a bit field that is split across two words
2062    and return an RTX for the result.
2063
2064    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2065    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2066    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2067
2068    If REVERSE is true, the extraction is to be done in reverse order.  */
2069
2070 static rtx
2071 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2072                          unsigned HOST_WIDE_INT bitpos, int unsignedp,
2073                          bool reverse)
2074 {
2075   unsigned int unit;
2076   unsigned int bitsdone = 0;
2077   rtx result = NULL_RTX;
2078   int first = 1;
2079
2080   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2081      much at a time.  */
2082   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2083     unit = BITS_PER_WORD;
2084   else
2085     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2086
2087   while (bitsdone < bitsize)
2088     {
2089       unsigned HOST_WIDE_INT thissize;
2090       rtx part, word;
2091       unsigned HOST_WIDE_INT thispos;
2092       unsigned HOST_WIDE_INT offset;
2093
2094       offset = (bitpos + bitsdone) / unit;
2095       thispos = (bitpos + bitsdone) % unit;
2096
2097       /* THISSIZE must not overrun a word boundary.  Otherwise,
2098          extract_fixed_bit_field will call us again, and we will mutually
2099          recurse forever.  */
2100       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2101       thissize = MIN (thissize, unit - thispos);
2102
2103       /* If OP0 is a register, then handle OFFSET here.  */
2104       if (SUBREG_P (op0) || REG_P (op0))
2105         {
2106           word = operand_subword_force (op0, offset, GET_MODE (op0));
2107           offset = 0;
2108         }
2109       else
2110         word = op0;
2111
2112       /* Extract the parts in bit-counting order,
2113          whose meaning is determined by BYTES_PER_UNIT.
2114          OFFSET is in UNITs, and UNIT is in bits.  */
2115       part = extract_fixed_bit_field (word_mode, word, thissize,
2116                                       offset * unit + thispos, 0, 1, reverse);
2117       bitsdone += thissize;
2118
2119       /* Shift this part into place for the result.  */
2120       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2121         {
2122           if (bitsize != bitsdone)
2123             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2124                                  bitsize - bitsdone, 0, 1);
2125         }
2126       else
2127         {
2128           if (bitsdone != thissize)
2129             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2130                                  bitsdone - thissize, 0, 1);
2131         }
2132
2133       if (first)
2134         result = part;
2135       else
2136         /* Combine the parts with bitwise or.  This works
2137            because we extracted each part as an unsigned bit field.  */
2138         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2139                                OPTAB_LIB_WIDEN);
2140
2141       first = 0;
2142     }
2143
2144   /* Unsigned bit field: we are done.  */
2145   if (unsignedp)
2146     return result;
2147   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2148   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2149                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2150   return expand_shift (RSHIFT_EXPR, word_mode, result,
2151                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2152 }
2153 \f
2154 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2155    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2156    MODE, fill the upper bits with zeros.  Fail if the layout of either
2157    mode is unknown (as for CC modes) or if the extraction would involve
2158    unprofitable mode punning.  Return the value on success, otherwise
2159    return null.
2160
2161    This is different from gen_lowpart* in these respects:
2162
2163      - the returned value must always be considered an rvalue
2164
2165      - when MODE is wider than SRC_MODE, the extraction involves
2166        a zero extension
2167
2168      - when MODE is smaller than SRC_MODE, the extraction involves
2169        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2170
2171    In other words, this routine performs a computation, whereas the
2172    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2173    operations.  */
2174
2175 rtx
2176 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2177 {
2178   machine_mode int_mode, src_int_mode;
2179
2180   if (mode == src_mode)
2181     return src;
2182
2183   if (CONSTANT_P (src))
2184     {
2185       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2186          fails, it will happily create (subreg (symbol_ref)) or similar
2187          invalid SUBREGs.  */
2188       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2189       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2190       if (ret)
2191         return ret;
2192
2193       if (GET_MODE (src) == VOIDmode
2194           || !validate_subreg (mode, src_mode, src, byte))
2195         return NULL_RTX;
2196
2197       src = force_reg (GET_MODE (src), src);
2198       return gen_rtx_SUBREG (mode, src, byte);
2199     }
2200
2201   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2202     return NULL_RTX;
2203
2204   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2205       && MODES_TIEABLE_P (mode, src_mode))
2206     {
2207       rtx x = gen_lowpart_common (mode, src);
2208       if (x)
2209         return x;
2210     }
2211
2212   src_int_mode = int_mode_for_mode (src_mode);
2213   int_mode = int_mode_for_mode (mode);
2214   if (src_int_mode == BLKmode || int_mode == BLKmode)
2215     return NULL_RTX;
2216
2217   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2218     return NULL_RTX;
2219   if (!MODES_TIEABLE_P (int_mode, mode))
2220     return NULL_RTX;
2221
2222   src = gen_lowpart (src_int_mode, src);
2223   src = convert_modes (int_mode, src_int_mode, src, true);
2224   src = gen_lowpart (mode, src);
2225   return src;
2226 }
2227 \f
2228 /* Add INC into TARGET.  */
2229
2230 void
2231 expand_inc (rtx target, rtx inc)
2232 {
2233   rtx value = expand_binop (GET_MODE (target), add_optab,
2234                             target, inc,
2235                             target, 0, OPTAB_LIB_WIDEN);
2236   if (value != target)
2237     emit_move_insn (target, value);
2238 }
2239
2240 /* Subtract DEC from TARGET.  */
2241
2242 void
2243 expand_dec (rtx target, rtx dec)
2244 {
2245   rtx value = expand_binop (GET_MODE (target), sub_optab,
2246                             target, dec,
2247                             target, 0, OPTAB_LIB_WIDEN);
2248   if (value != target)
2249     emit_move_insn (target, value);
2250 }
2251 \f
2252 /* Output a shift instruction for expression code CODE,
2253    with SHIFTED being the rtx for the value to shift,
2254    and AMOUNT the rtx for the amount to shift by.
2255    Store the result in the rtx TARGET, if that is convenient.
2256    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2257    Return the rtx for where the value is.
2258    If that cannot be done, abort the compilation unless MAY_FAIL is true,
2259    in which case 0 is returned.  */
2260
2261 static rtx
2262 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2263                 rtx amount, rtx target, int unsignedp, bool may_fail = false)
2264 {
2265   rtx op1, temp = 0;
2266   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2267   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2268   optab lshift_optab = ashl_optab;
2269   optab rshift_arith_optab = ashr_optab;
2270   optab rshift_uns_optab = lshr_optab;
2271   optab lrotate_optab = rotl_optab;
2272   optab rrotate_optab = rotr_optab;
2273   machine_mode op1_mode;
2274   machine_mode scalar_mode = mode;
2275   int attempt;
2276   bool speed = optimize_insn_for_speed_p ();
2277
2278   if (VECTOR_MODE_P (mode))
2279     scalar_mode = GET_MODE_INNER (mode);
2280   op1 = amount;
2281   op1_mode = GET_MODE (op1);
2282
2283   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2284      shift amount is a vector, use the vector/vector shift patterns.  */
2285   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2286     {
2287       lshift_optab = vashl_optab;
2288       rshift_arith_optab = vashr_optab;
2289       rshift_uns_optab = vlshr_optab;
2290       lrotate_optab = vrotl_optab;
2291       rrotate_optab = vrotr_optab;
2292     }
2293
2294   /* Previously detected shift-counts computed by NEGATE_EXPR
2295      and shifted in the other direction; but that does not work
2296      on all machines.  */
2297
2298   if (SHIFT_COUNT_TRUNCATED)
2299     {
2300       if (CONST_INT_P (op1)
2301           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2302               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2303         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2304                        % GET_MODE_BITSIZE (scalar_mode));
2305       else if (GET_CODE (op1) == SUBREG
2306                && subreg_lowpart_p (op1)
2307                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2308                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2309         op1 = SUBREG_REG (op1);
2310     }
2311
2312   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2313      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2314      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2315      amount instead.  */
2316   if (rotate
2317       && CONST_INT_P (op1)
2318       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2319                    GET_MODE_BITSIZE (scalar_mode) - 1))
2320     {
2321       op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2322       left = !left;
2323       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2324     }
2325
2326   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2327      Note that this is not the case for bigger values.  For instance a rotation
2328      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2329      0x04030201 (bswapsi).  */
2330   if (rotate
2331       && CONST_INT_P (op1)
2332       && INTVAL (op1) == BITS_PER_UNIT
2333       && GET_MODE_SIZE (scalar_mode) == 2
2334       && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2335     return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2336                                   unsignedp);
2337
2338   if (op1 == const0_rtx)
2339     return shifted;
2340
2341   /* Check whether its cheaper to implement a left shift by a constant
2342      bit count by a sequence of additions.  */
2343   if (code == LSHIFT_EXPR
2344       && CONST_INT_P (op1)
2345       && INTVAL (op1) > 0
2346       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2347       && INTVAL (op1) < MAX_BITS_PER_WORD
2348       && (shift_cost (speed, mode, INTVAL (op1))
2349           > INTVAL (op1) * add_cost (speed, mode))
2350       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2351     {
2352       int i;
2353       for (i = 0; i < INTVAL (op1); i++)
2354         {
2355           temp = force_reg (mode, shifted);
2356           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2357                                   unsignedp, OPTAB_LIB_WIDEN);
2358         }
2359       return shifted;
2360     }
2361
2362   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2363     {
2364       enum optab_methods methods;
2365
2366       if (attempt == 0)
2367         methods = OPTAB_DIRECT;
2368       else if (attempt == 1)
2369         methods = OPTAB_WIDEN;
2370       else
2371         methods = OPTAB_LIB_WIDEN;
2372
2373       if (rotate)
2374         {
2375           /* Widening does not work for rotation.  */
2376           if (methods == OPTAB_WIDEN)
2377             continue;
2378           else if (methods == OPTAB_LIB_WIDEN)
2379             {
2380               /* If we have been unable to open-code this by a rotation,
2381                  do it as the IOR of two shifts.  I.e., to rotate A
2382                  by N bits, compute
2383                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2384                  where C is the bitsize of A.
2385
2386                  It is theoretically possible that the target machine might
2387                  not be able to perform either shift and hence we would
2388                  be making two libcalls rather than just the one for the
2389                  shift (similarly if IOR could not be done).  We will allow
2390                  this extremely unlikely lossage to avoid complicating the
2391                  code below.  */
2392
2393               rtx subtarget = target == shifted ? 0 : target;
2394               rtx new_amount, other_amount;
2395               rtx temp1;
2396
2397               new_amount = op1;
2398               if (op1 == const0_rtx)
2399                 return shifted;
2400               else if (CONST_INT_P (op1))
2401                 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2402                                         - INTVAL (op1));
2403               else
2404                 {
2405                   other_amount
2406                     = simplify_gen_unary (NEG, GET_MODE (op1),
2407                                           op1, GET_MODE (op1));
2408                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2409                   other_amount
2410                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2411                                            gen_int_mode (mask, GET_MODE (op1)));
2412                 }
2413
2414               shifted = force_reg (mode, shifted);
2415
2416               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2417                                      mode, shifted, new_amount, 0, 1);
2418               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2419                                       mode, shifted, other_amount,
2420                                       subtarget, 1);
2421               return expand_binop (mode, ior_optab, temp, temp1, target,
2422                                    unsignedp, methods);
2423             }
2424
2425           temp = expand_binop (mode,
2426                                left ? lrotate_optab : rrotate_optab,
2427                                shifted, op1, target, unsignedp, methods);
2428         }
2429       else if (unsignedp)
2430         temp = expand_binop (mode,
2431                              left ? lshift_optab : rshift_uns_optab,
2432                              shifted, op1, target, unsignedp, methods);
2433
2434       /* Do arithmetic shifts.
2435          Also, if we are going to widen the operand, we can just as well
2436          use an arithmetic right-shift instead of a logical one.  */
2437       if (temp == 0 && ! rotate
2438           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2439         {
2440           enum optab_methods methods1 = methods;
2441
2442           /* If trying to widen a log shift to an arithmetic shift,
2443              don't accept an arithmetic shift of the same size.  */
2444           if (unsignedp)
2445             methods1 = OPTAB_MUST_WIDEN;
2446
2447           /* Arithmetic shift */
2448
2449           temp = expand_binop (mode,
2450                                left ? lshift_optab : rshift_arith_optab,
2451                                shifted, op1, target, unsignedp, methods1);
2452         }
2453
2454       /* We used to try extzv here for logical right shifts, but that was
2455          only useful for one machine, the VAX, and caused poor code
2456          generation there for lshrdi3, so the code was deleted and a
2457          define_expand for lshrsi3 was added to vax.md.  */
2458     }
2459
2460   gcc_assert (temp != NULL_RTX || may_fail);
2461   return temp;
2462 }
2463
2464 /* Output a shift instruction for expression code CODE,
2465    with SHIFTED being the rtx for the value to shift,
2466    and AMOUNT the amount to shift by.
2467    Store the result in the rtx TARGET, if that is convenient.
2468    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2469    Return the rtx for where the value is.  */
2470
2471 rtx
2472 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2473               int amount, rtx target, int unsignedp)
2474 {
2475   return expand_shift_1 (code, mode,
2476                          shifted, GEN_INT (amount), target, unsignedp);
2477 }
2478
2479 /* Likewise, but return 0 if that cannot be done.  */
2480
2481 static rtx
2482 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2483                     int amount, rtx target, int unsignedp)
2484 {
2485   return expand_shift_1 (code, mode,
2486                          shifted, GEN_INT (amount), target, unsignedp, true);
2487 }
2488
2489 /* Output a shift instruction for expression code CODE,
2490    with SHIFTED being the rtx for the value to shift,
2491    and AMOUNT the tree for the amount to shift by.
2492    Store the result in the rtx TARGET, if that is convenient.
2493    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2494    Return the rtx for where the value is.  */
2495
2496 rtx
2497 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2498                        tree amount, rtx target, int unsignedp)
2499 {
2500   return expand_shift_1 (code, mode,
2501                          shifted, expand_normal (amount), target, unsignedp);
2502 }
2503
2504 \f
2505 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2506                         const struct mult_cost *, machine_mode mode);
2507 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2508                               const struct algorithm *, enum mult_variant);
2509 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2510 static rtx extract_high_half (machine_mode, rtx);
2511 static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2512 static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
2513                                        int, int);
2514 /* Compute and return the best algorithm for multiplying by T.
2515    The algorithm must cost less than cost_limit
2516    If retval.cost >= COST_LIMIT, no algorithm was found and all
2517    other field of the returned struct are undefined.
2518    MODE is the machine mode of the multiplication.  */
2519
2520 static void
2521 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2522             const struct mult_cost *cost_limit, machine_mode mode)
2523 {
2524   int m;
2525   struct algorithm *alg_in, *best_alg;
2526   struct mult_cost best_cost;
2527   struct mult_cost new_limit;
2528   int op_cost, op_latency;
2529   unsigned HOST_WIDE_INT orig_t = t;
2530   unsigned HOST_WIDE_INT q;
2531   int maxm, hash_index;
2532   bool cache_hit = false;
2533   enum alg_code cache_alg = alg_zero;
2534   bool speed = optimize_insn_for_speed_p ();
2535   machine_mode imode;
2536   struct alg_hash_entry *entry_ptr;
2537
2538   /* Indicate that no algorithm is yet found.  If no algorithm
2539      is found, this value will be returned and indicate failure.  */
2540   alg_out->cost.cost = cost_limit->cost + 1;
2541   alg_out->cost.latency = cost_limit->latency + 1;
2542
2543   if (cost_limit->cost < 0
2544       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2545     return;
2546
2547   /* Be prepared for vector modes.  */
2548   imode = GET_MODE_INNER (mode);
2549
2550   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2551
2552   /* Restrict the bits of "t" to the multiplication's mode.  */
2553   t &= GET_MODE_MASK (imode);
2554
2555   /* t == 1 can be done in zero cost.  */
2556   if (t == 1)
2557     {
2558       alg_out->ops = 1;
2559       alg_out->cost.cost = 0;
2560       alg_out->cost.latency = 0;
2561       alg_out->op[0] = alg_m;
2562       return;
2563     }
2564
2565   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2566      fail now.  */
2567   if (t == 0)
2568     {
2569       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2570         return;
2571       else
2572         {
2573           alg_out->ops = 1;
2574           alg_out->cost.cost = zero_cost (speed);
2575           alg_out->cost.latency = zero_cost (speed);
2576           alg_out->op[0] = alg_zero;
2577           return;
2578         }
2579     }
2580
2581   /* We'll be needing a couple extra algorithm structures now.  */
2582
2583   alg_in = XALLOCA (struct algorithm);
2584   best_alg = XALLOCA (struct algorithm);
2585   best_cost = *cost_limit;
2586
2587   /* Compute the hash index.  */
2588   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2589
2590   /* See if we already know what to do for T.  */
2591   entry_ptr = alg_hash_entry_ptr (hash_index);
2592   if (entry_ptr->t == t
2593       && entry_ptr->mode == mode
2594       && entry_ptr->speed == speed
2595       && entry_ptr->alg != alg_unknown)
2596     {
2597       cache_alg = entry_ptr->alg;
2598
2599       if (cache_alg == alg_impossible)
2600         {
2601           /* The cache tells us that it's impossible to synthesize
2602              multiplication by T within entry_ptr->cost.  */
2603           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2604             /* COST_LIMIT is at least as restrictive as the one
2605                recorded in the hash table, in which case we have no
2606                hope of synthesizing a multiplication.  Just
2607                return.  */
2608             return;
2609
2610           /* If we get here, COST_LIMIT is less restrictive than the
2611              one recorded in the hash table, so we may be able to
2612              synthesize a multiplication.  Proceed as if we didn't
2613              have the cache entry.  */
2614         }
2615       else
2616         {
2617           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2618             /* The cached algorithm shows that this multiplication
2619                requires more cost than COST_LIMIT.  Just return.  This
2620                way, we don't clobber this cache entry with
2621                alg_impossible but retain useful information.  */
2622             return;
2623
2624           cache_hit = true;
2625
2626           switch (cache_alg)
2627             {
2628             case alg_shift:
2629               goto do_alg_shift;
2630
2631             case alg_add_t_m2:
2632             case alg_sub_t_m2:
2633               goto do_alg_addsub_t_m2;
2634
2635             case alg_add_factor:
2636             case alg_sub_factor:
2637               goto do_alg_addsub_factor;
2638
2639             case alg_add_t2_m:
2640               goto do_alg_add_t2_m;
2641
2642             case alg_sub_t2_m:
2643               goto do_alg_sub_t2_m;
2644
2645             default:
2646               gcc_unreachable ();
2647             }
2648         }
2649     }
2650
2651   /* If we have a group of zero bits at the low-order part of T, try
2652      multiplying by the remaining bits and then doing a shift.  */
2653
2654   if ((t & 1) == 0)
2655     {
2656     do_alg_shift:
2657       m = ctz_or_zero (t); /* m = number of low zero bits */
2658       if (m < maxm)
2659         {
2660           q = t >> m;
2661           /* The function expand_shift will choose between a shift and
2662              a sequence of additions, so the observed cost is given as
2663              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2664           op_cost = m * add_cost (speed, mode);
2665           if (shift_cost (speed, mode, m) < op_cost)
2666             op_cost = shift_cost (speed, mode, m);
2667           new_limit.cost = best_cost.cost - op_cost;
2668           new_limit.latency = best_cost.latency - op_cost;
2669           synth_mult (alg_in, q, &new_limit, mode);
2670
2671           alg_in->cost.cost += op_cost;
2672           alg_in->cost.latency += op_cost;
2673           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2674             {
2675               best_cost = alg_in->cost;
2676               std::swap (alg_in, best_alg);
2677               best_alg->log[best_alg->ops] = m;
2678               best_alg->op[best_alg->ops] = alg_shift;
2679             }
2680
2681           /* See if treating ORIG_T as a signed number yields a better
2682              sequence.  Try this sequence only for a negative ORIG_T
2683              as it would be useless for a non-negative ORIG_T.  */
2684           if ((HOST_WIDE_INT) orig_t < 0)
2685             {
2686               /* Shift ORIG_T as follows because a right shift of a
2687                  negative-valued signed type is implementation
2688                  defined.  */
2689               q = ~(~orig_t >> m);
2690               /* The function expand_shift will choose between a shift
2691                  and a sequence of additions, so the observed cost is
2692                  given as MIN (m * add_cost(speed, mode),
2693                  shift_cost(speed, mode, m)).  */
2694               op_cost = m * add_cost (speed, mode);
2695               if (shift_cost (speed, mode, m) < op_cost)
2696                 op_cost = shift_cost (speed, mode, m);
2697               new_limit.cost = best_cost.cost - op_cost;
2698               new_limit.latency = best_cost.latency - op_cost;
2699               synth_mult (alg_in, q, &new_limit, mode);
2700
2701               alg_in->cost.cost += op_cost;
2702               alg_in->cost.latency += op_cost;
2703               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2704                 {
2705                   best_cost = alg_in->cost;
2706                   std::swap (alg_in, best_alg);
2707                   best_alg->log[best_alg->ops] = m;
2708                   best_alg->op[best_alg->ops] = alg_shift;
2709                 }
2710             }
2711         }
2712       if (cache_hit)
2713         goto done;
2714     }
2715
2716   /* If we have an odd number, add or subtract one.  */
2717   if ((t & 1) != 0)
2718     {
2719       unsigned HOST_WIDE_INT w;
2720
2721     do_alg_addsub_t_m2:
2722       for (w = 1; (w & t) != 0; w <<= 1)
2723         ;
2724       /* If T was -1, then W will be zero after the loop.  This is another
2725          case where T ends with ...111.  Handling this with (T + 1) and
2726          subtract 1 produces slightly better code and results in algorithm
2727          selection much faster than treating it like the ...0111 case
2728          below.  */
2729       if (w == 0
2730           || (w > 2
2731               /* Reject the case where t is 3.
2732                  Thus we prefer addition in that case.  */
2733               && t != 3))
2734         {
2735           /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2736
2737           op_cost = add_cost (speed, mode);
2738           new_limit.cost = best_cost.cost - op_cost;
2739           new_limit.latency = best_cost.latency - op_cost;
2740           synth_mult (alg_in, t + 1, &new_limit, mode);
2741
2742           alg_in->cost.cost += op_cost;
2743           alg_in->cost.latency += op_cost;
2744           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2745             {
2746               best_cost = alg_in->cost;
2747               std::swap (alg_in, best_alg);
2748               best_alg->log[best_alg->ops] = 0;
2749               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2750             }
2751         }
2752       else
2753         {
2754           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2755
2756           op_cost = add_cost (speed, mode);
2757           new_limit.cost = best_cost.cost - op_cost;
2758           new_limit.latency = best_cost.latency - op_cost;
2759           synth_mult (alg_in, t - 1, &new_limit, mode);
2760
2761           alg_in->cost.cost += op_cost;
2762           alg_in->cost.latency += op_cost;
2763           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2764             {
2765               best_cost = alg_in->cost;
2766               std::swap (alg_in, best_alg);
2767               best_alg->log[best_alg->ops] = 0;
2768               best_alg->op[best_alg->ops] = alg_add_t_m2;
2769             }
2770         }
2771
2772       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2773          quickly with a - a * n for some appropriate constant n.  */
2774       m = exact_log2 (-orig_t + 1);
2775       if (m >= 0 && m < maxm)
2776         {
2777           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2778           /* If the target has a cheap shift-and-subtract insn use
2779              that in preference to a shift insn followed by a sub insn.
2780              Assume that the shift-and-sub is "atomic" with a latency
2781              equal to it's cost, otherwise assume that on superscalar
2782              hardware the shift may be executed concurrently with the
2783              earlier steps in the algorithm.  */
2784           if (shiftsub1_cost (speed, mode, m) <= op_cost)
2785             {
2786               op_cost = shiftsub1_cost (speed, mode, m);
2787               op_latency = op_cost;
2788             }
2789           else
2790             op_latency = add_cost (speed, mode);
2791
2792           new_limit.cost = best_cost.cost - op_cost;
2793           new_limit.latency = best_cost.latency - op_latency;
2794           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2795                       &new_limit, mode);
2796
2797           alg_in->cost.cost += op_cost;
2798           alg_in->cost.latency += op_latency;
2799           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2800             {
2801               best_cost = alg_in->cost;
2802               std::swap (alg_in, best_alg);
2803               best_alg->log[best_alg->ops] = m;
2804               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2805             }
2806         }
2807
2808       if (cache_hit)
2809         goto done;
2810     }
2811
2812   /* Look for factors of t of the form
2813      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2814      If we find such a factor, we can multiply by t using an algorithm that
2815      multiplies by q, shift the result by m and add/subtract it to itself.
2816
2817      We search for large factors first and loop down, even if large factors
2818      are less probable than small; if we find a large factor we will find a
2819      good sequence quickly, and therefore be able to prune (by decreasing
2820      COST_LIMIT) the search.  */
2821
2822  do_alg_addsub_factor:
2823   for (m = floor_log2 (t - 1); m >= 2; m--)
2824     {
2825       unsigned HOST_WIDE_INT d;
2826
2827       d = (HOST_WIDE_INT_1U << m) + 1;
2828       if (t % d == 0 && t > d && m < maxm
2829           && (!cache_hit || cache_alg == alg_add_factor))
2830         {
2831           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2832           if (shiftadd_cost (speed, mode, m) <= op_cost)
2833             op_cost = shiftadd_cost (speed, mode, m);
2834
2835           op_latency = op_cost;
2836
2837
2838           new_limit.cost = best_cost.cost - op_cost;
2839           new_limit.latency = best_cost.latency - op_latency;
2840           synth_mult (alg_in, t / d, &new_limit, mode);
2841
2842           alg_in->cost.cost += op_cost;
2843           alg_in->cost.latency += op_latency;
2844           if (alg_in->cost.latency < op_cost)
2845             alg_in->cost.latency = op_cost;
2846           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2847             {
2848               best_cost = alg_in->cost;
2849               std::swap (alg_in, best_alg);
2850               best_alg->log[best_alg->ops] = m;
2851               best_alg->op[best_alg->ops] = alg_add_factor;
2852             }
2853           /* Other factors will have been taken care of in the recursion.  */
2854           break;
2855         }
2856
2857       d = (HOST_WIDE_INT_1U << m) - 1;
2858       if (t % d == 0 && t > d && m < maxm
2859           && (!cache_hit || cache_alg == alg_sub_factor))
2860         {
2861           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2862           if (shiftsub0_cost (speed, mode, m) <= op_cost)
2863             op_cost = shiftsub0_cost (speed, mode, m);
2864
2865           op_latency = op_cost;
2866
2867           new_limit.cost = best_cost.cost - op_cost;
2868           new_limit.latency = best_cost.latency - op_latency;
2869           synth_mult (alg_in, t / d, &new_limit, mode);
2870
2871           alg_in->cost.cost += op_cost;
2872           alg_in->cost.latency += op_latency;
2873           if (alg_in->cost.latency < op_cost)
2874             alg_in->cost.latency = op_cost;
2875           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2876             {
2877               best_cost = alg_in->cost;
2878               std::swap (alg_in, best_alg);
2879               best_alg->log[best_alg->ops] = m;
2880               best_alg->op[best_alg->ops] = alg_sub_factor;
2881             }
2882           break;
2883         }
2884     }
2885   if (cache_hit)
2886     goto done;
2887
2888   /* Try shift-and-add (load effective address) instructions,
2889      i.e. do a*3, a*5, a*9.  */
2890   if ((t & 1) != 0)
2891     {
2892     do_alg_add_t2_m:
2893       q = t - 1;
2894       m = ctz_hwi (q);
2895       if (q && m < maxm)
2896         {
2897           op_cost = shiftadd_cost (speed, mode, m);
2898           new_limit.cost = best_cost.cost - op_cost;
2899           new_limit.latency = best_cost.latency - op_cost;
2900           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2901
2902           alg_in->cost.cost += op_cost;
2903           alg_in->cost.latency += op_cost;
2904           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2905             {
2906               best_cost = alg_in->cost;
2907               std::swap (alg_in, best_alg);
2908               best_alg->log[best_alg->ops] = m;
2909               best_alg->op[best_alg->ops] = alg_add_t2_m;
2910             }
2911         }
2912       if (cache_hit)
2913         goto done;
2914
2915     do_alg_sub_t2_m:
2916       q = t + 1;
2917       m = ctz_hwi (q);
2918       if (q && m < maxm)
2919         {
2920           op_cost = shiftsub0_cost (speed, mode, m);
2921           new_limit.cost = best_cost.cost - op_cost;
2922           new_limit.latency = best_cost.latency - op_cost;
2923           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2924
2925           alg_in->cost.cost += op_cost;
2926           alg_in->cost.latency += op_cost;
2927           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2928             {
2929               best_cost = alg_in->cost;
2930               std::swap (alg_in, best_alg);
2931               best_alg->log[best_alg->ops] = m;
2932               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2933             }
2934         }
2935       if (cache_hit)
2936         goto done;
2937     }
2938
2939  done:
2940   /* If best_cost has not decreased, we have not found any algorithm.  */
2941   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2942     {
2943       /* We failed to find an algorithm.  Record alg_impossible for
2944          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2945          we are asked to find an algorithm for T within the same or
2946          lower COST_LIMIT, we can immediately return to the
2947          caller.  */
2948       entry_ptr->t = t;
2949       entry_ptr->mode = mode;
2950       entry_ptr->speed = speed;
2951       entry_ptr->alg = alg_impossible;
2952       entry_ptr->cost = *cost_limit;
2953       return;
2954     }
2955
2956   /* Cache the result.  */
2957   if (!cache_hit)
2958     {
2959       entry_ptr->t = t;
2960       entry_ptr->mode = mode;
2961       entry_ptr->speed = speed;
2962       entry_ptr->alg = best_alg->op[best_alg->ops];
2963       entry_ptr->cost.cost = best_cost.cost;
2964       entry_ptr->cost.latency = best_cost.latency;
2965     }
2966
2967   /* If we are getting a too long sequence for `struct algorithm'
2968      to record, make this search fail.  */
2969   if (best_alg->ops == MAX_BITS_PER_WORD)
2970     return;
2971
2972   /* Copy the algorithm from temporary space to the space at alg_out.
2973      We avoid using structure assignment because the majority of
2974      best_alg is normally undefined, and this is a critical function.  */
2975   alg_out->ops = best_alg->ops + 1;
2976   alg_out->cost = best_cost;
2977   memcpy (alg_out->op, best_alg->op,
2978           alg_out->ops * sizeof *alg_out->op);
2979   memcpy (alg_out->log, best_alg->log,
2980           alg_out->ops * sizeof *alg_out->log);
2981 }
2982 \f
2983 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2984    Try three variations:
2985
2986        - a shift/add sequence based on VAL itself
2987        - a shift/add sequence based on -VAL, followed by a negation
2988        - a shift/add sequence based on VAL - 1, followed by an addition.
2989
2990    Return true if the cheapest of these cost less than MULT_COST,
2991    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2992
2993 bool
2994 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
2995                      struct algorithm *alg, enum mult_variant *variant,
2996                      int mult_cost)
2997 {
2998   struct algorithm alg2;
2999   struct mult_cost limit;
3000   int op_cost;
3001   bool speed = optimize_insn_for_speed_p ();
3002
3003   /* Fail quickly for impossible bounds.  */
3004   if (mult_cost < 0)
3005     return false;
3006
3007   /* Ensure that mult_cost provides a reasonable upper bound.
3008      Any constant multiplication can be performed with less
3009      than 2 * bits additions.  */
3010   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3011   if (mult_cost > op_cost)
3012     mult_cost = op_cost;
3013
3014   *variant = basic_variant;
3015   limit.cost = mult_cost;
3016   limit.latency = mult_cost;
3017   synth_mult (alg, val, &limit, mode);
3018
3019   /* This works only if the inverted value actually fits in an
3020      `unsigned int' */
3021   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3022     {
3023       op_cost = neg_cost (speed, mode);
3024       if (MULT_COST_LESS (&alg->cost, mult_cost))
3025         {
3026           limit.cost = alg->cost.cost - op_cost;
3027           limit.latency = alg->cost.latency - op_cost;
3028         }
3029       else
3030         {
3031           limit.cost = mult_cost - op_cost;
3032           limit.latency = mult_cost - op_cost;
3033         }
3034
3035       synth_mult (&alg2, -val, &limit, mode);
3036       alg2.cost.cost += op_cost;
3037       alg2.cost.latency += op_cost;
3038       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3039         *alg = alg2, *variant = negate_variant;
3040     }
3041
3042   /* This proves very useful for division-by-constant.  */
3043   op_cost = add_cost (speed, mode);
3044   if (MULT_COST_LESS (&alg->cost, mult_cost))
3045     {
3046       limit.cost = alg->cost.cost - op_cost;
3047       limit.latency = alg->cost.latency - op_cost;
3048     }
3049   else
3050     {
3051       limit.cost = mult_cost - op_cost;
3052       limit.latency = mult_cost - op_cost;
3053     }
3054
3055   synth_mult (&alg2, val - 1, &limit, mode);
3056   alg2.cost.cost += op_cost;
3057   alg2.cost.latency += op_cost;
3058   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3059     *alg = alg2, *variant = add_variant;
3060
3061   return MULT_COST_LESS (&alg->cost, mult_cost);
3062 }
3063
3064 /* A subroutine of expand_mult, used for constant multiplications.
3065    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3066    convenient.  Use the shift/add sequence described by ALG and apply
3067    the final fixup specified by VARIANT.  */
3068
3069 static rtx
3070 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3071                    rtx target, const struct algorithm *alg,
3072                    enum mult_variant variant)
3073 {
3074   unsigned HOST_WIDE_INT val_so_far;
3075   rtx_insn *insn;
3076   rtx accum, tem;
3077   int opno;
3078   machine_mode nmode;
3079
3080   /* Avoid referencing memory over and over and invalid sharing
3081      on SUBREGs.  */
3082   op0 = force_reg (mode, op0);
3083
3084   /* ACCUM starts out either as OP0 or as a zero, depending on
3085      the first operation.  */
3086
3087   if (alg->op[0] == alg_zero)
3088     {
3089       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3090       val_so_far = 0;
3091     }
3092   else if (alg->op[0] == alg_m)
3093     {
3094       accum = copy_to_mode_reg (mode, op0);
3095       val_so_far = 1;
3096     }
3097   else
3098     gcc_unreachable ();
3099
3100   for (opno = 1; opno < alg->ops; opno++)
3101     {
3102       int log = alg->log[opno];
3103       rtx shift_subtarget = optimize ? 0 : accum;
3104       rtx add_target
3105         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3106            && !optimize)
3107           ? target : 0;
3108       rtx accum_target = optimize ? 0 : accum;
3109       rtx accum_inner;
3110
3111       switch (alg->op[opno])
3112         {
3113         case alg_shift:
3114           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3115           /* REG_EQUAL note will be attached to the following insn.  */
3116           emit_move_insn (accum, tem);
3117           val_so_far <<= log;
3118           break;
3119
3120         case alg_add_t_m2:
3121           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3122           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3123                                  add_target ? add_target : accum_target);
3124           val_so_far += HOST_WIDE_INT_1U << log;
3125           break;
3126
3127         case alg_sub_t_m2:
3128           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3129           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3130                                  add_target ? add_target : accum_target);
3131           val_so_far -= HOST_WIDE_INT_1U << log;
3132           break;
3133
3134         case alg_add_t2_m:
3135           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3136                                 log, shift_subtarget, 0);
3137           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3138                                  add_target ? add_target : accum_target);
3139           val_so_far = (val_so_far << log) + 1;
3140           break;
3141
3142         case alg_sub_t2_m:
3143           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3144                                 log, shift_subtarget, 0);
3145           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3146                                  add_target ? add_target : accum_target);
3147           val_so_far = (val_so_far << log) - 1;
3148           break;
3149
3150         case alg_add_factor:
3151           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3152           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3153                                  add_target ? add_target : accum_target);
3154           val_so_far += val_so_far << log;
3155           break;
3156
3157         case alg_sub_factor:
3158           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3159           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3160                                  (add_target
3161                                   ? add_target : (optimize ? 0 : tem)));
3162           val_so_far = (val_so_far << log) - val_so_far;
3163           break;
3164
3165         default:
3166           gcc_unreachable ();
3167         }
3168
3169       if (SCALAR_INT_MODE_P (mode))
3170         {
3171           /* Write a REG_EQUAL note on the last insn so that we can cse
3172              multiplication sequences.  Note that if ACCUM is a SUBREG,
3173              we've set the inner register and must properly indicate that.  */
3174           tem = op0, nmode = mode;
3175           accum_inner = accum;
3176           if (GET_CODE (accum) == SUBREG)
3177             {
3178               accum_inner = SUBREG_REG (accum);
3179               nmode = GET_MODE (accum_inner);
3180               tem = gen_lowpart (nmode, op0);
3181             }
3182
3183           insn = get_last_insn ();
3184           set_dst_reg_note (insn, REG_EQUAL,
3185                             gen_rtx_MULT (nmode, tem,
3186                                           gen_int_mode (val_so_far, nmode)),
3187                             accum_inner);
3188         }
3189     }
3190
3191   if (variant == negate_variant)
3192     {
3193       val_so_far = -val_so_far;
3194       accum = expand_unop (mode, neg_optab, accum, target, 0);
3195     }
3196   else if (variant == add_variant)
3197     {
3198       val_so_far = val_so_far + 1;
3199       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3200     }
3201
3202   /* Compare only the bits of val and val_so_far that are significant
3203      in the result mode, to avoid sign-/zero-extension confusion.  */
3204   nmode = GET_MODE_INNER (mode);
3205   val &= GET_MODE_MASK (nmode);
3206   val_so_far &= GET_MODE_MASK (nmode);
3207   gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3208
3209   return accum;
3210 }
3211
3212 /* Perform a multiplication and return an rtx for the result.
3213    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3214    TARGET is a suggestion for where to store the result (an rtx).
3215
3216    We check specially for a constant integer as OP1.
3217    If you want this check for OP0 as well, then before calling
3218    you should swap the two operands if OP0 would be constant.  */
3219
3220 rtx
3221 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3222              int unsignedp)
3223 {
3224   enum mult_variant variant;
3225   struct algorithm algorithm;
3226   rtx scalar_op1;
3227   int max_cost;
3228   bool speed = optimize_insn_for_speed_p ();
3229   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3230
3231   if (CONSTANT_P (op0))
3232     std::swap (op0, op1);
3233
3234   /* For vectors, there are several simplifications that can be made if
3235      all elements of the vector constant are identical.  */
3236   scalar_op1 = unwrap_const_vec_duplicate (op1);
3237
3238   if (INTEGRAL_MODE_P (mode))
3239     {
3240       rtx fake_reg;
3241       HOST_WIDE_INT coeff;
3242       bool is_neg;
3243       int mode_bitsize;
3244
3245       if (op1 == CONST0_RTX (mode))
3246         return op1;
3247       if (op1 == CONST1_RTX (mode))
3248         return op0;
3249       if (op1 == CONSTM1_RTX (mode))
3250         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3251                             op0, target, 0);
3252
3253       if (do_trapv)
3254         goto skip_synth;
3255
3256       /* If mode is integer vector mode, check if the backend supports
3257          vector lshift (by scalar or vector) at all.  If not, we can't use
3258          synthetized multiply.  */
3259       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3260           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3261           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3262         goto skip_synth;
3263
3264       /* These are the operations that are potentially turned into
3265          a sequence of shifts and additions.  */
3266       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3267
3268       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3269          less than or equal in size to `unsigned int' this doesn't matter.
3270          If the mode is larger than `unsigned int', then synth_mult works
3271          only if the constant value exactly fits in an `unsigned int' without
3272          any truncation.  This means that multiplying by negative values does
3273          not work; results are off by 2^32 on a 32 bit machine.  */
3274       if (CONST_INT_P (scalar_op1))
3275         {
3276           coeff = INTVAL (scalar_op1);
3277           is_neg = coeff < 0;
3278         }
3279 #if TARGET_SUPPORTS_WIDE_INT
3280       else if (CONST_WIDE_INT_P (scalar_op1))
3281 #else
3282       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3283 #endif
3284         {
3285           int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3286           /* Perfect power of 2 (other than 1, which is handled above).  */
3287           if (shift > 0)
3288             return expand_shift (LSHIFT_EXPR, mode, op0,
3289                                  shift, target, unsignedp);
3290           else
3291             goto skip_synth;
3292         }
3293       else
3294         goto skip_synth;
3295
3296       /* We used to test optimize here, on the grounds that it's better to
3297          produce a smaller program when -O is not used.  But this causes
3298          such a terrible slowdown sometimes that it seems better to always
3299          use synth_mult.  */
3300
3301       /* Special case powers of two.  */
3302       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3303           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3304         return expand_shift (LSHIFT_EXPR, mode, op0,
3305                              floor_log2 (coeff), target, unsignedp);
3306
3307       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3308
3309       /* Attempt to handle multiplication of DImode values by negative
3310          coefficients, by performing the multiplication by a positive
3311          multiplier and then inverting the result.  */
3312       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3313         {
3314           /* Its safe to use -coeff even for INT_MIN, as the
3315              result is interpreted as an unsigned coefficient.
3316              Exclude cost of op0 from max_cost to match the cost
3317              calculation of the synth_mult.  */
3318           coeff = -(unsigned HOST_WIDE_INT) coeff;
3319           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3320                                     mode, speed)
3321                       - neg_cost (speed, mode));
3322           if (max_cost <= 0)
3323             goto skip_synth;
3324
3325           /* Special case powers of two.  */
3326           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3327             {
3328               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3329                                        floor_log2 (coeff), target, unsignedp);
3330               return expand_unop (mode, neg_optab, temp, target, 0);
3331             }
3332
3333           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3334                                    max_cost))
3335             {
3336               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3337                                             &algorithm, variant);
3338               return expand_unop (mode, neg_optab, temp, target, 0);
3339             }
3340           goto skip_synth;
3341         }
3342
3343       /* Exclude cost of op0 from max_cost to match the cost
3344          calculation of the synth_mult.  */
3345       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3346       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3347         return expand_mult_const (mode, op0, coeff, target,
3348                                   &algorithm, variant);
3349     }
3350  skip_synth:
3351
3352   /* Expand x*2.0 as x+x.  */
3353   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3354       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3355     {
3356       op0 = force_reg (GET_MODE (op0), op0);
3357       return expand_binop (mode, add_optab, op0, op0,
3358                            target, unsignedp, OPTAB_LIB_WIDEN);
3359     }
3360
3361   /* This used to use umul_optab if unsigned, but for non-widening multiply
3362      there is no difference between signed and unsigned.  */
3363   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3364                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3365   gcc_assert (op0);
3366   return op0;
3367 }
3368
3369 /* Return a cost estimate for multiplying a register by the given
3370    COEFFicient in the given MODE and SPEED.  */
3371
3372 int
3373 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3374 {
3375   int max_cost;
3376   struct algorithm algorithm;
3377   enum mult_variant variant;
3378
3379   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3380   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3381                            mode, speed);
3382   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3383     return algorithm.cost.cost;
3384   else
3385     return max_cost;
3386 }
3387
3388 /* Perform a widening multiplication and return an rtx for the result.
3389    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3390    TARGET is a suggestion for where to store the result (an rtx).
3391    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3392    or smul_widen_optab.
3393
3394    We check specially for a constant integer as OP1, comparing the
3395    cost of a widening multiply against the cost of a sequence of shifts
3396    and adds.  */
3397
3398 rtx
3399 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3400                       int unsignedp, optab this_optab)
3401 {
3402   bool speed = optimize_insn_for_speed_p ();
3403   rtx cop1;
3404
3405   if (CONST_INT_P (op1)
3406       && GET_MODE (op0) != VOIDmode
3407       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3408                                 this_optab == umul_widen_optab))
3409       && CONST_INT_P (cop1)
3410       && (INTVAL (cop1) >= 0
3411           || HWI_COMPUTABLE_MODE_P (mode)))
3412     {
3413       HOST_WIDE_INT coeff = INTVAL (cop1);
3414       int max_cost;
3415       enum mult_variant variant;
3416       struct algorithm algorithm;
3417
3418       if (coeff == 0)
3419         return CONST0_RTX (mode);
3420
3421       /* Special case powers of two.  */
3422       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3423         {
3424           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3425           return expand_shift (LSHIFT_EXPR, mode, op0,
3426                                floor_log2 (coeff), target, unsignedp);
3427         }
3428
3429       /* Exclude cost of op0 from max_cost to match the cost
3430          calculation of the synth_mult.  */
3431       max_cost = mul_widen_cost (speed, mode);
3432       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3433                                max_cost))
3434         {
3435           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3436           return expand_mult_const (mode, op0, coeff, target,
3437                                     &algorithm, variant);
3438         }
3439     }
3440   return expand_binop (mode, this_optab, op0, op1, target,
3441                        unsignedp, OPTAB_LIB_WIDEN);
3442 }
3443 \f
3444 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3445    replace division by D, and put the least significant N bits of the result
3446    in *MULTIPLIER_PTR and return the most significant bit.
3447
3448    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3449    needed precision is in PRECISION (should be <= N).
3450
3451    PRECISION should be as small as possible so this function can choose
3452    multiplier more freely.
3453
3454    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3455    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3456
3457    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3458    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3459
3460 unsigned HOST_WIDE_INT
3461 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3462                    unsigned HOST_WIDE_INT *multiplier_ptr,
3463                    int *post_shift_ptr, int *lgup_ptr)
3464 {
3465   int lgup, post_shift;
3466   int pow, pow2;
3467
3468   /* lgup = ceil(log2(divisor)); */
3469   lgup = ceil_log2 (d);
3470
3471   gcc_assert (lgup <= n);
3472
3473   pow = n + lgup;
3474   pow2 = n + lgup - precision;
3475
3476   /* mlow = 2^(N + lgup)/d */
3477   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3478   wide_int mlow = wi::udiv_trunc (val, d);
3479
3480   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3481   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3482   wide_int mhigh = wi::udiv_trunc (val, d);
3483
3484   /* If precision == N, then mlow, mhigh exceed 2^N
3485      (but they do not exceed 2^(N+1)).  */
3486
3487   /* Reduce to lowest terms.  */
3488   for (post_shift = lgup; post_shift > 0; post_shift--)
3489     {
3490       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3491                                                        HOST_BITS_PER_WIDE_INT);
3492       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3493                                                        HOST_BITS_PER_WIDE_INT);
3494       if (ml_lo >= mh_lo)
3495         break;
3496
3497       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3498       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3499     }
3500
3501   *post_shift_ptr = post_shift;
3502   *lgup_ptr = lgup;
3503   if (n < HOST_BITS_PER_WIDE_INT)
3504     {
3505       unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3506       *multiplier_ptr = mhigh.to_uhwi () & mask;
3507       return mhigh.to_uhwi () >= mask;
3508     }
3509   else
3510     {
3511       *multiplier_ptr = mhigh.to_uhwi ();
3512       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3513     }
3514 }
3515
3516 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3517    congruent to 1 (mod 2**N).  */
3518
3519 static unsigned HOST_WIDE_INT
3520 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3521 {
3522   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3523
3524   /* The algorithm notes that the choice y = x satisfies
3525      x*y == 1 mod 2^3, since x is assumed odd.
3526      Each iteration doubles the number of bits of significance in y.  */
3527
3528   unsigned HOST_WIDE_INT mask;
3529   unsigned HOST_WIDE_INT y = x;
3530   int nbit = 3;
3531
3532   mask = (n == HOST_BITS_PER_WIDE_INT
3533           ? HOST_WIDE_INT_M1U
3534           : (HOST_WIDE_INT_1U << n) - 1);
3535
3536   while (nbit < n)
3537     {
3538       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3539       nbit *= 2;
3540     }
3541   return y;
3542 }
3543
3544 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3545    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3546    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3547    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3548    become signed.
3549
3550    The result is put in TARGET if that is convenient.
3551
3552    MODE is the mode of operation.  */
3553
3554 rtx
3555 expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
3556                              rtx op1, rtx target, int unsignedp)
3557 {
3558   rtx tem;
3559   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3560
3561   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3562                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3563   tem = expand_and (mode, tem, op1, NULL_RTX);
3564   adj_operand
3565     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3566                      adj_operand);
3567
3568   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3569                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3570   tem = expand_and (mode, tem, op0, NULL_RTX);
3571   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3572                           target);
3573
3574   return target;
3575 }
3576
3577 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3578
3579 static rtx
3580 extract_high_half (machine_mode mode, rtx op)
3581 {
3582   machine_mode wider_mode;
3583
3584   if (mode == word_mode)
3585     return gen_highpart (mode, op);
3586
3587   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3588
3589   wider_mode = GET_MODE_WIDER_MODE (mode);
3590   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3591                      GET_MODE_BITSIZE (mode), 0, 1);
3592   return convert_modes (mode, wider_mode, op, 0);
3593 }
3594
3595 /* Like expmed_mult_highpart, but only consider using a multiplication
3596    optab.  OP1 is an rtx for the constant operand.  */
3597
3598 static rtx
3599 expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
3600                             rtx target, int unsignedp, int max_cost)
3601 {
3602   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3603   machine_mode wider_mode;
3604   optab moptab;
3605   rtx tem;
3606   int size;
3607   bool speed = optimize_insn_for_speed_p ();
3608
3609   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3610
3611   wider_mode = GET_MODE_WIDER_MODE (mode);
3612   size = GET_MODE_BITSIZE (mode);
3613
3614   /* Firstly, try using a multiplication insn that only generates the needed
3615      high part of the product, and in the sign flavor of unsignedp.  */
3616   if (mul_highpart_cost (speed, mode) < max_cost)
3617     {
3618       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3619       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3620                           unsignedp, OPTAB_DIRECT);
3621       if (tem)
3622         return tem;
3623     }
3624
3625   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3626      Need to adjust the result after the multiplication.  */
3627   if (size - 1 < BITS_PER_WORD
3628       && (mul_highpart_cost (speed, mode)
3629           + 2 * shift_cost (speed, mode, size-1)
3630           + 4 * add_cost (speed, mode) < max_cost))
3631     {
3632       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3633       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3634                           unsignedp, OPTAB_DIRECT);
3635       if (tem)
3636         /* We used the wrong signedness.  Adjust the result.  */
3637         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3638                                             tem, unsignedp);
3639     }
3640
3641   /* Try widening multiplication.  */
3642   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3643   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3644       && mul_widen_cost (speed, wider_mode) < max_cost)
3645     {
3646       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3647                           unsignedp, OPTAB_WIDEN);
3648       if (tem)
3649         return extract_high_half (mode, tem);
3650     }
3651
3652   /* Try widening the mode and perform a non-widening multiplication.  */
3653   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3654       && size - 1 < BITS_PER_WORD
3655       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3656           < max_cost))
3657     {
3658       rtx_insn *insns;
3659       rtx wop0, wop1;
3660
3661       /* We need to widen the operands, for example to ensure the
3662          constant multiplier is correctly sign or zero extended.
3663          Use a sequence to clean-up any instructions emitted by
3664          the conversions if things don't work out.  */
3665       start_sequence ();
3666       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3667       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3668       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3669                           unsignedp, OPTAB_WIDEN);
3670       insns = get_insns ();
3671       end_sequence ();
3672
3673       if (tem)
3674         {
3675           emit_insn (insns);
3676           return extract_high_half (mode, tem);
3677         }
3678     }
3679
3680   /* Try widening multiplication of opposite signedness, and adjust.  */
3681   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3682   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3683       && size - 1 < BITS_PER_WORD
3684       && (mul_widen_cost (speed, wider_mode)
3685           + 2 * shift_cost (speed, mode, size-1)
3686           + 4 * add_cost (speed, mode) < max_cost))
3687     {
3688       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3689                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3690       if (tem != 0)
3691         {
3692           tem = extract_high_half (mode, tem);
3693           /* We used the wrong signedness.  Adjust the result.  */
3694           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3695                                               target, unsignedp);
3696         }
3697     }
3698
3699   return 0;
3700 }
3701
3702 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3703    putting the high half of the result in TARGET if that is convenient,
3704    and return where the result is.  If the operation can not be performed,
3705    0 is returned.
3706
3707    MODE is the mode of operation and result.
3708
3709    UNSIGNEDP nonzero means unsigned multiply.
3710
3711    MAX_COST is the total allowed cost for the expanded RTL.  */
3712
3713 static rtx
3714 expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
3715                       rtx target, int unsignedp, int max_cost)
3716 {
3717   machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3718   unsigned HOST_WIDE_INT cnst1;
3719   int extra_cost;
3720   bool sign_adjust = false;
3721   enum mult_variant variant;
3722   struct algorithm alg;
3723   rtx tem;
3724   bool speed = optimize_insn_for_speed_p ();
3725
3726   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3727   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3728   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3729
3730   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3731
3732   /* We can't optimize modes wider than BITS_PER_WORD.
3733      ??? We might be able to perform double-word arithmetic if
3734      mode == word_mode, however all the cost calculations in
3735      synth_mult etc. assume single-word operations.  */
3736   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3737     return expmed_mult_highpart_optab (mode, op0, op1, target,
3738                                        unsignedp, max_cost);
3739
3740   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3741
3742   /* Check whether we try to multiply by a negative constant.  */
3743   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3744     {
3745       sign_adjust = true;
3746       extra_cost += add_cost (speed, mode);
3747     }
3748
3749   /* See whether shift/add multiplication is cheap enough.  */
3750   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3751                            max_cost - extra_cost))
3752     {
3753       /* See whether the specialized multiplication optabs are
3754          cheaper than the shift/add version.  */
3755       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3756                                         alg.cost.cost + extra_cost);
3757       if (tem)
3758         return tem;
3759
3760       tem = convert_to_mode (wider_mode, op0, unsignedp);
3761       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3762       tem = extract_high_half (mode, tem);
3763
3764       /* Adjust result for signedness.  */
3765       if (sign_adjust)
3766         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3767
3768       return tem;
3769     }
3770   return expmed_mult_highpart_optab (mode, op0, op1, target,
3771                                      unsignedp, max_cost);
3772 }
3773
3774
3775 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3776
3777 static rtx
3778 expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3779 {
3780   rtx result, temp, shift;
3781   rtx_code_label *label;
3782   int logd;
3783   int prec = GET_MODE_PRECISION (mode);
3784
3785   logd = floor_log2 (d);
3786   result = gen_reg_rtx (mode);
3787
3788   /* Avoid conditional branches when they're expensive.  */
3789   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3790       && optimize_insn_for_speed_p ())
3791     {
3792       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3793                                       mode, 0, -1);
3794       if (signmask)
3795         {
3796           HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
3797           signmask = force_reg (mode, signmask);
3798           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3799
3800           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3801              which instruction sequence to use.  If logical right shifts
3802              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3803              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3804
3805           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3806           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3807               || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
3808                   > COSTS_N_INSNS (2)))
3809             {
3810               temp = expand_binop (mode, xor_optab, op0, signmask,
3811                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3812               temp = expand_binop (mode, sub_optab, temp, signmask,
3813                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3814               temp = expand_binop (mode, and_optab, temp,
3815                                    gen_int_mode (masklow, mode),
3816                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3817               temp = expand_binop (mode, xor_optab, temp, signmask,
3818                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3819               temp = expand_binop (mode, sub_optab, temp, signmask,
3820                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3821             }
3822           else
3823             {
3824               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3825                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3826               signmask = force_reg (mode, signmask);
3827
3828               temp = expand_binop (mode, add_optab, op0, signmask,
3829                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3830               temp = expand_binop (mode, and_optab, temp,
3831                                    gen_int_mode (masklow, mode),
3832                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3833               temp = expand_binop (mode, sub_optab, temp, signmask,
3834                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3835             }
3836           return temp;
3837         }
3838     }
3839
3840   /* Mask contains the mode's signbit and the significant bits of the
3841      modulus.  By including the signbit in the operation, many targets
3842      can avoid an explicit compare operation in the following comparison
3843      against zero.  */
3844   wide_int mask = wi::mask (logd, false, prec);
3845   mask = wi::set_bit (mask, prec - 1);
3846
3847   temp = expand_binop (mode, and_optab, op0,
3848                        immed_wide_int_const (mask, mode),
3849                        result, 1, OPTAB_LIB_WIDEN);
3850   if (temp != result)
3851     emit_move_insn (result, temp);
3852
3853   label = gen_label_rtx ();
3854   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3855
3856   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3857                        0, OPTAB_LIB_WIDEN);
3858
3859   mask = wi::mask (logd, true, prec);
3860   temp = expand_binop (mode, ior_optab, temp,
3861                        immed_wide_int_const (mask, mode),
3862                        result, 1, OPTAB_LIB_WIDEN);
3863   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3864                        0, OPTAB_LIB_WIDEN);
3865   if (temp != result)
3866     emit_move_insn (result, temp);
3867   emit_label (label);
3868   return result;
3869 }
3870
3871 /* Expand signed division of OP0 by a power of two D in mode MODE.
3872    This routine is only called for positive values of D.  */
3873
3874 static rtx
3875 expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3876 {
3877   rtx temp;
3878   rtx_code_label *label;
3879   int logd;
3880
3881   logd = floor_log2 (d);
3882
3883   if (d == 2
3884       && BRANCH_COST (optimize_insn_for_speed_p (),
3885                       false) >= 1)
3886     {
3887       temp = gen_reg_rtx (mode);
3888       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3889       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3890                            0, OPTAB_LIB_WIDEN);
3891       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3892     }
3893
3894   if (HAVE_conditional_move
3895       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
3896     {
3897       rtx temp2;
3898
3899       start_sequence ();
3900       temp2 = copy_to_mode_reg (mode, op0);
3901       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3902                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3903       temp = force_reg (mode, temp);
3904
3905       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3906       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3907                                      mode, temp, temp2, mode, 0);
3908       if (temp2)
3909         {
3910           rtx_insn *seq = get_insns ();
3911           end_sequence ();
3912           emit_insn (seq);
3913           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3914         }
3915       end_sequence ();
3916     }
3917
3918   if (BRANCH_COST (optimize_insn_for_speed_p (),
3919                    false) >= 2)
3920     {
3921       int ushift = GET_MODE_BITSIZE (mode) - logd;
3922
3923       temp = gen_reg_rtx (mode);
3924       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3925       if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3926           || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3927              > COSTS_N_INSNS (1))
3928         temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3929                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3930       else
3931         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3932                              ushift, NULL_RTX, 1);
3933       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3934                            0, OPTAB_LIB_WIDEN);
3935       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3936     }
3937
3938   label = gen_label_rtx ();
3939   temp = copy_to_mode_reg (mode, op0);
3940   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3941   expand_inc (temp, gen_int_mode (d - 1, mode));
3942   emit_label (label);
3943   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3944 }
3945 \f
3946 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3947    if that is convenient, and returning where the result is.
3948    You may request either the quotient or the remainder as the result;
3949    specify REM_FLAG nonzero to get the remainder.
3950
3951    CODE is the expression code for which kind of division this is;
3952    it controls how rounding is done.  MODE is the machine mode to use.
3953    UNSIGNEDP nonzero means do unsigned division.  */
3954
3955 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3956    and then correct it by or'ing in missing high bits
3957    if result of ANDI is nonzero.
3958    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3959    This could optimize to a bfexts instruction.
3960    But C doesn't use these operations, so their optimizations are
3961    left for later.  */
3962 /* ??? For modulo, we don't actually need the highpart of the first product,
3963    the low part will do nicely.  And for small divisors, the second multiply
3964    can also be a low-part only multiply or even be completely left out.
3965    E.g. to calculate the remainder of a division by 3 with a 32 bit
3966    multiply, multiply with 0x55555556 and extract the upper two bits;
3967    the result is exact for inputs up to 0x1fffffff.
3968    The input range can be reduced by using cross-sum rules.
3969    For odd divisors >= 3, the following table gives right shift counts
3970    so that if a number is shifted by an integer multiple of the given
3971    amount, the remainder stays the same:
3972    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3973    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3974    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3975    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3976    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3977
3978    Cross-sum rules for even numbers can be derived by leaving as many bits
3979    to the right alone as the divisor has zeros to the right.
3980    E.g. if x is an unsigned 32 bit number:
3981    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3982    */
3983
3984 rtx
3985 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
3986                rtx op0, rtx op1, rtx target, int unsignedp)
3987 {
3988   machine_mode compute_mode;
3989   rtx tquotient;
3990   rtx quotient = 0, remainder = 0;
3991   rtx_insn *last;
3992   int size;
3993   rtx_insn *insn;
3994   optab optab1, optab2;
3995   int op1_is_constant, op1_is_pow2 = 0;
3996   int max_cost, extra_cost;
3997   static HOST_WIDE_INT last_div_const = 0;
3998   bool speed = optimize_insn_for_speed_p ();
3999
4000   op1_is_constant = CONST_INT_P (op1);
4001   if (op1_is_constant)
4002     {
4003       wide_int ext_op1 = rtx_mode_t (op1, mode);
4004       op1_is_pow2 = (wi::popcount (ext_op1) == 1
4005                      || (! unsignedp
4006                          && wi::popcount (wi::neg (ext_op1)) == 1));
4007     }
4008
4009   /*
4010      This is the structure of expand_divmod:
4011
4012      First comes code to fix up the operands so we can perform the operations
4013      correctly and efficiently.
4014
4015      Second comes a switch statement with code specific for each rounding mode.
4016      For some special operands this code emits all RTL for the desired
4017      operation, for other cases, it generates only a quotient and stores it in
4018      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4019      to indicate that it has not done anything.
4020
4021      Last comes code that finishes the operation.  If QUOTIENT is set and
4022      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4023      QUOTIENT is not set, it is computed using trunc rounding.
4024
4025      We try to generate special code for division and remainder when OP1 is a
4026      constant.  If |OP1| = 2**n we can use shifts and some other fast
4027      operations.  For other values of OP1, we compute a carefully selected
4028      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4029      by m.
4030
4031      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4032      half of the product.  Different strategies for generating the product are
4033      implemented in expmed_mult_highpart.
4034
4035      If what we actually want is the remainder, we generate that by another
4036      by-constant multiplication and a subtraction.  */
4037
4038   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4039      code below will malfunction if we are, so check here and handle
4040      the special case if so.  */
4041   if (op1 == const1_rtx)
4042     return rem_flag ? const0_rtx : op0;
4043
4044     /* When dividing by -1, we could get an overflow.
4045      negv_optab can handle overflows.  */
4046   if (! unsignedp && op1 == constm1_rtx)
4047     {
4048       if (rem_flag)
4049         return const0_rtx;
4050       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4051                           ? negv_optab : neg_optab, op0, target, 0);
4052     }
4053
4054   if (target
4055       /* Don't use the function value register as a target
4056          since we have to read it as well as write it,
4057          and function-inlining gets confused by this.  */
4058       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4059           /* Don't clobber an operand while doing a multi-step calculation.  */
4060           || ((rem_flag || op1_is_constant)
4061               && (reg_mentioned_p (target, op0)
4062                   || (MEM_P (op0) && MEM_P (target))))
4063           || reg_mentioned_p (target, op1)
4064           || (MEM_P (op1) && MEM_P (target))))
4065     target = 0;
4066
4067   /* Get the mode in which to perform this computation.  Normally it will
4068      be MODE, but sometimes we can't do the desired operation in MODE.
4069      If so, pick a wider mode in which we can do the operation.  Convert
4070      to that mode at the start to avoid repeated conversions.
4071
4072      First see what operations we need.  These depend on the expression
4073      we are evaluating.  (We assume that divxx3 insns exist under the
4074      same conditions that modxx3 insns and that these insns don't normally
4075      fail.  If these assumptions are not correct, we may generate less
4076      efficient code in some cases.)
4077
4078      Then see if we find a mode in which we can open-code that operation
4079      (either a division, modulus, or shift).  Finally, check for the smallest
4080      mode for which we can do the operation with a library call.  */
4081
4082   /* We might want to refine this now that we have division-by-constant
4083      optimization.  Since expmed_mult_highpart tries so many variants, it is
4084      not straightforward to generalize this.  Maybe we should make an array
4085      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4086
4087   optab1 = (op1_is_pow2
4088             ? (unsignedp ? lshr_optab : ashr_optab)
4089             : (unsignedp ? udiv_optab : sdiv_optab));
4090   optab2 = (op1_is_pow2 ? optab1
4091             : (unsignedp ? udivmod_optab : sdivmod_optab));
4092
4093   for (compute_mode = mode; compute_mode != VOIDmode;
4094        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4095     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4096         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4097       break;
4098
4099   if (compute_mode == VOIDmode)
4100     for (compute_mode = mode; compute_mode != VOIDmode;
4101          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4102       if (optab_libfunc (optab1, compute_mode)
4103           || optab_libfunc (optab2, compute_mode))
4104         break;
4105
4106   /* If we still couldn't find a mode, use MODE, but expand_binop will
4107      probably die.  */
4108   if (compute_mode == VOIDmode)
4109     compute_mode = mode;
4110
4111   if (target && GET_MODE (target) == compute_mode)
4112     tquotient = target;
4113   else
4114     tquotient = gen_reg_rtx (compute_mode);
4115
4116   size = GET_MODE_BITSIZE (compute_mode);
4117 #if 0
4118   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4119      (mode), and thereby get better code when OP1 is a constant.  Do that
4120      later.  It will require going over all usages of SIZE below.  */
4121   size = GET_MODE_BITSIZE (mode);
4122 #endif
4123
4124   /* Only deduct something for a REM if the last divide done was
4125      for a different constant.   Then set the constant of the last
4126      divide.  */
4127   max_cost = (unsignedp
4128               ? udiv_cost (speed, compute_mode)
4129               : sdiv_cost (speed, compute_mode));
4130   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4131                      && INTVAL (op1) == last_div_const))
4132     max_cost -= (mul_cost (speed, compute_mode)
4133                  + add_cost (speed, compute_mode));
4134
4135   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4136
4137   /* Now convert to the best mode to use.  */
4138   if (compute_mode != mode)
4139     {
4140       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4141       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4142
4143       /* convert_modes may have placed op1 into a register, so we
4144          must recompute the following.  */
4145       op1_is_constant = CONST_INT_P (op1);
4146       if (op1_is_constant)
4147         {
4148           wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4149           op1_is_pow2 = (wi::popcount (ext_op1) == 1
4150                          || (! unsignedp
4151                              && wi::popcount (wi::neg (ext_op1)) == 1));
4152         }
4153       else
4154         op1_is_pow2 = 0;
4155     }
4156
4157   /* If one of the operands is a volatile MEM, copy it into a register.  */
4158
4159   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4160     op0 = force_reg (compute_mode, op0);
4161   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4162     op1 = force_reg (compute_mode, op1);
4163
4164   /* If we need the remainder or if OP1 is constant, we need to
4165      put OP0 in a register in case it has any queued subexpressions.  */
4166   if (rem_flag || op1_is_constant)
4167     op0 = force_reg (compute_mode, op0);
4168
4169   last = get_last_insn ();
4170
4171   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4172   if (unsignedp)
4173     {
4174       if (code == FLOOR_DIV_EXPR)
4175         code = TRUNC_DIV_EXPR;
4176       if (code == FLOOR_MOD_EXPR)
4177         code = TRUNC_MOD_EXPR;
4178       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4179         code = TRUNC_DIV_EXPR;
4180     }
4181
4182   if (op1 != const0_rtx)
4183     switch (code)
4184       {
4185       case TRUNC_MOD_EXPR:
4186       case TRUNC_DIV_EXPR:
4187         if (op1_is_constant)
4188           {
4189             if (unsignedp)
4190               {
4191                 unsigned HOST_WIDE_INT mh, ml;
4192                 int pre_shift, post_shift;
4193                 int dummy;
4194                 wide_int wd = rtx_mode_t (op1, compute_mode);
4195                 unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4196
4197                 if (wi::popcount (wd) == 1)
4198                   {
4199                     pre_shift = floor_log2 (d);
4200                     if (rem_flag)
4201                       {
4202                         unsigned HOST_WIDE_INT mask
4203                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4204                         remainder
4205                           = expand_binop (compute_mode, and_optab, op0,
4206                                           gen_int_mode (mask, compute_mode),
4207                                           remainder, 1,
4208                                           OPTAB_LIB_WIDEN);
4209                         if (remainder)
4210                           return gen_lowpart (mode, remainder);
4211                       }
4212                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4213                                              pre_shift, tquotient, 1);
4214                   }
4215                 else if (size <= HOST_BITS_PER_WIDE_INT)
4216                   {
4217                     if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4218                       {
4219                         /* Most significant bit of divisor is set; emit an scc
4220                            insn.  */
4221                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4222                                                           compute_mode, 1, 1);
4223                       }
4224                     else
4225                       {
4226                         /* Find a suitable multiplier and right shift count
4227                            instead of multiplying with D.  */
4228
4229                         mh = choose_multiplier (d, size, size,
4230                                                 &ml, &post_shift, &dummy);
4231
4232                         /* If the suggested multiplier is more than SIZE bits,
4233                            we can do better for even divisors, using an
4234                            initial right shift.  */
4235                         if (mh != 0 && (d & 1) == 0)
4236                           {
4237                             pre_shift = ctz_or_zero (d);
4238                             mh = choose_multiplier (d >> pre_shift, size,
4239                                                     size - pre_shift,
4240                                                     &ml, &post_shift, &dummy);
4241                             gcc_assert (!mh);
4242                           }
4243                         else
4244                           pre_shift = 0;
4245
4246                         if (mh != 0)
4247                           {
4248                             rtx t1, t2, t3, t4;
4249
4250                             if (post_shift - 1 >= BITS_PER_WORD)
4251                               goto fail1;
4252
4253                             extra_cost
4254                               = (shift_cost (speed, compute_mode, post_shift - 1)
4255                                  + shift_cost (speed, compute_mode, 1)
4256                                  + 2 * add_cost (speed, compute_mode));
4257                             t1 = expmed_mult_highpart
4258                               (compute_mode, op0,
4259                                gen_int_mode (ml, compute_mode),
4260                                NULL_RTX, 1, max_cost - extra_cost);
4261                             if (t1 == 0)
4262                               goto fail1;
4263                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4264                                                                op0, t1),
4265                                                 NULL_RTX);
4266                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4267                                                t2, 1, NULL_RTX, 1);
4268                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4269                                                               t1, t3),
4270                                                 NULL_RTX);
4271                             quotient = expand_shift
4272                               (RSHIFT_EXPR, compute_mode, t4,
4273                                post_shift - 1, tquotient, 1);
4274                           }
4275                         else
4276                           {
4277                             rtx t1, t2;
4278
4279                             if (pre_shift >= BITS_PER_WORD
4280                                 || post_shift >= BITS_PER_WORD)
4281                               goto fail1;
4282
4283                             t1 = expand_shift
4284                               (RSHIFT_EXPR, compute_mode, op0,
4285                                pre_shift, NULL_RTX, 1);
4286                             extra_cost
4287                               = (shift_cost (speed, compute_mode, pre_shift)
4288                                  + shift_cost (speed, compute_mode, post_shift));
4289                             t2 = expmed_mult_highpart
4290                               (compute_mode, t1,
4291                                gen_int_mode (ml, compute_mode),
4292                                NULL_RTX, 1, max_cost - extra_cost);
4293                             if (t2 == 0)
4294                               goto fail1;
4295                             quotient = expand_shift
4296                               (RSHIFT_EXPR, compute_mode, t2,
4297                                post_shift, tquotient, 1);
4298                           }
4299                       }
4300                   }
4301                 else            /* Too wide mode to use tricky code */
4302                   break;
4303
4304                 insn = get_last_insn ();
4305                 if (insn != last)
4306                   set_dst_reg_note (insn, REG_EQUAL,
4307                                     gen_rtx_UDIV (compute_mode, op0, op1),
4308                                     quotient);
4309               }
4310             else                /* TRUNC_DIV, signed */
4311               {
4312                 unsigned HOST_WIDE_INT ml;
4313                 int lgup, post_shift;
4314                 rtx mlr;
4315                 HOST_WIDE_INT d = INTVAL (op1);
4316                 unsigned HOST_WIDE_INT abs_d;
4317
4318                 /* Since d might be INT_MIN, we have to cast to
4319                    unsigned HOST_WIDE_INT before negating to avoid
4320                    undefined signed overflow.  */
4321                 abs_d = (d >= 0
4322                          ? (unsigned HOST_WIDE_INT) d
4323                          : - (unsigned HOST_WIDE_INT) d);
4324
4325                 /* n rem d = n rem -d */
4326                 if (rem_flag && d < 0)
4327                   {
4328                     d = abs_d;
4329                     op1 = gen_int_mode (abs_d, compute_mode);
4330                   }
4331
4332                 if (d == 1)
4333                   quotient = op0;
4334                 else if (d == -1)
4335                   quotient = expand_unop (compute_mode, neg_optab, op0,
4336                                           tquotient, 0);
4337                 else if (size <= HOST_BITS_PER_WIDE_INT
4338                          && abs_d == HOST_WIDE_INT_1U << (size - 1))
4339                   {
4340                     /* This case is not handled correctly below.  */
4341                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4342                                                 compute_mode, 1, 1);
4343                     if (quotient == 0)
4344                       goto fail1;
4345                   }
4346                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4347                          && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4348                          && (rem_flag
4349                              ? smod_pow2_cheap (speed, compute_mode)
4350                              : sdiv_pow2_cheap (speed, compute_mode))
4351                          /* We assume that cheap metric is true if the
4352                             optab has an expander for this mode.  */
4353                          && ((optab_handler ((rem_flag ? smod_optab
4354                                               : sdiv_optab),
4355                                              compute_mode)
4356                               != CODE_FOR_nothing)
4357                              || (optab_handler (sdivmod_optab,
4358                                                 compute_mode)
4359                                  != CODE_FOR_nothing)))
4360                   ;
4361                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d)
4362                          && (size <= HOST_BITS_PER_WIDE_INT
4363                              || abs_d != (unsigned HOST_WIDE_INT) d))
4364                   {
4365                     if (rem_flag)
4366                       {
4367                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4368                         if (remainder)
4369                           return gen_lowpart (mode, remainder);
4370                       }
4371
4372                     if (sdiv_pow2_cheap (speed, compute_mode)
4373                         && ((optab_handler (sdiv_optab, compute_mode)
4374                              != CODE_FOR_nothing)
4375                             || (optab_handler (sdivmod_optab, compute_mode)
4376                                 != CODE_FOR_nothing)))
4377                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4378                                                 compute_mode, op0,
4379                                                 gen_int_mode (abs_d,
4380                                                               compute_mode),
4381                                                 NULL_RTX, 0);
4382                     else
4383                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4384
4385                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4386                        negate the quotient.  */
4387                     if (d < 0)
4388                       {
4389                         insn = get_last_insn ();
4390                         if (insn != last
4391                             && abs_d < (HOST_WIDE_INT_1U
4392                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4393                           set_dst_reg_note (insn, REG_EQUAL,
4394                                             gen_rtx_DIV (compute_mode, op0,
4395                                                          gen_int_mode
4396                                                            (abs_d,
4397                                                             compute_mode)),
4398                                             quotient);
4399
4400                         quotient = expand_unop (compute_mode, neg_optab,
4401                                                 quotient, quotient, 0);
4402                       }
4403                   }
4404                 else if (size <= HOST_BITS_PER_WIDE_INT)
4405                   {
4406                     choose_multiplier (abs_d, size, size - 1,
4407                                        &ml, &post_shift, &lgup);
4408                     if (ml < HOST_WIDE_INT_1U << (size - 1))
4409                       {
4410                         rtx t1, t2, t3;
4411
4412                         if (post_shift >= BITS_PER_WORD
4413                             || size - 1 >= BITS_PER_WORD)
4414                           goto fail1;
4415
4416                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4417                                       + shift_cost (speed, compute_mode, size - 1)
4418                                       + add_cost (speed, compute_mode));
4419                         t1 = expmed_mult_highpart
4420                           (compute_mode, op0, gen_int_mode (ml, compute_mode),
4421                            NULL_RTX, 0, max_cost - extra_cost);
4422                         if (t1 == 0)
4423                           goto fail1;
4424                         t2 = expand_shift
4425                           (RSHIFT_EXPR, compute_mode, t1,
4426                            post_shift, NULL_RTX, 0);
4427                         t3 = expand_shift
4428                           (RSHIFT_EXPR, compute_mode, op0,
4429                            size - 1, NULL_RTX, 0);
4430                         if (d < 0)
4431                           quotient
4432                             = force_operand (gen_rtx_MINUS (compute_mode,
4433                                                             t3, t2),
4434                                              tquotient);
4435                         else
4436                           quotient
4437                             = force_operand (gen_rtx_MINUS (compute_mode,
4438                                                             t2, t3),
4439                                              tquotient);
4440                       }
4441                     else
4442                       {
4443                         rtx t1, t2, t3, t4;
4444
4445                         if (post_shift >= BITS_PER_WORD
4446                             || size - 1 >= BITS_PER_WORD)
4447                           goto fail1;
4448
4449                         ml |= HOST_WIDE_INT_M1U << (size - 1);
4450                         mlr = gen_int_mode (ml, compute_mode);
4451                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4452                                       + shift_cost (speed, compute_mode, size - 1)
4453                                       + 2 * add_cost (speed, compute_mode));
4454                         t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4455                                                    NULL_RTX, 0,
4456                                                    max_cost - extra_cost);
4457                         if (t1 == 0)
4458                           goto fail1;
4459                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4460                                                           t1, op0),
4461                                             NULL_RTX);
4462                         t3 = expand_shift
4463                           (RSHIFT_EXPR, compute_mode, t2,
4464                            post_shift, NULL_RTX, 0);
4465                         t4 = expand_shift
4466                           (RSHIFT_EXPR, compute_mode, op0,
4467                            size - 1, NULL_RTX, 0);
4468                         if (d < 0)
4469                           quotient
4470                             = force_operand (gen_rtx_MINUS (compute_mode,
4471                                                             t4, t3),
4472                                              tquotient);
4473                         else
4474                           quotient
4475                             = force_operand (gen_rtx_MINUS (compute_mode,
4476                                                             t3, t4),
4477                                              tquotient);
4478                       }
4479                   }
4480                 else            /* Too wide mode to use tricky code */
4481                   break;
4482
4483                 insn = get_last_insn ();
4484                 if (insn != last)
4485                   set_dst_reg_note (insn, REG_EQUAL,
4486                                     gen_rtx_DIV (compute_mode, op0, op1),
4487                                     quotient);
4488               }
4489             break;
4490           }
4491       fail1:
4492         delete_insns_since (last);
4493         break;
4494
4495       case FLOOR_DIV_EXPR:
4496       case FLOOR_MOD_EXPR:
4497       /* We will come here only for signed operations.  */
4498         if (op1_is_constant && size <= HOST_BITS_PER_WIDE_INT)
4499           {
4500             unsigned HOST_WIDE_INT mh, ml;
4501             int pre_shift, lgup, post_shift;
4502             HOST_WIDE_INT d = INTVAL (op1);
4503
4504             if (d > 0)
4505               {
4506                 /* We could just as easily deal with negative constants here,
4507                    but it does not seem worth the trouble for GCC 2.6.  */
4508                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4509                   {
4510                     pre_shift = floor_log2 (d);
4511                     if (rem_flag)
4512                       {
4513                         unsigned HOST_WIDE_INT mask
4514                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4515                         remainder = expand_binop
4516                           (compute_mode, and_optab, op0,
4517                            gen_int_mode (mask, compute_mode),
4518                            remainder, 0, OPTAB_LIB_WIDEN);
4519                         if (remainder)
4520                           return gen_lowpart (mode, remainder);
4521                       }
4522                     quotient = expand_shift
4523                       (RSHIFT_EXPR, compute_mode, op0,
4524                        pre_shift, tquotient, 0);
4525                   }
4526                 else
4527                   {
4528                     rtx t1, t2, t3, t4;
4529
4530                     mh = choose_multiplier (d, size, size - 1,
4531                                             &ml, &post_shift, &lgup);
4532                     gcc_assert (!mh);
4533
4534                     if (post_shift < BITS_PER_WORD
4535                         && size - 1 < BITS_PER_WORD)
4536                       {
4537                         t1 = expand_shift
4538                           (RSHIFT_EXPR, compute_mode, op0,
4539                            size - 1, NULL_RTX, 0);
4540                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4541                                            NULL_RTX, 0, OPTAB_WIDEN);
4542                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4543                                       + shift_cost (speed, compute_mode, size - 1)
4544                                       + 2 * add_cost (speed, compute_mode));
4545                         t3 = expmed_mult_highpart
4546                           (compute_mode, t2, gen_int_mode (ml, compute_mode),
4547                            NULL_RTX, 1, max_cost - extra_cost);
4548                         if (t3 != 0)
4549                           {
4550                             t4 = expand_shift
4551                               (RSHIFT_EXPR, compute_mode, t3,
4552                                post_shift, NULL_RTX, 1);
4553                             quotient = expand_binop (compute_mode, xor_optab,
4554                                                      t4, t1, tquotient, 0,
4555                                                      OPTAB_WIDEN);
4556                           }
4557                       }
4558                   }
4559               }
4560             else
4561               {
4562                 rtx nsign, t1, t2, t3, t4;
4563                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4564                                                   op0, constm1_rtx), NULL_RTX);
4565                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4566                                    0, OPTAB_WIDEN);
4567                 nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
4568                                       size - 1, NULL_RTX, 0);
4569                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4570                                     NULL_RTX);
4571                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4572                                     NULL_RTX, 0);
4573                 if (t4)
4574                   {
4575                     rtx t5;
4576                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4577                                       NULL_RTX, 0);
4578                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4579                                                             t4, t5),
4580                                               tquotient);
4581                   }
4582               }
4583           }
4584
4585         if (quotient != 0)
4586           break;
4587         delete_insns_since (last);
4588
4589         /* Try using an instruction that produces both the quotient and
4590            remainder, using truncation.  We can easily compensate the quotient
4591            or remainder to get floor rounding, once we have the remainder.
4592            Notice that we compute also the final remainder value here,
4593            and return the result right away.  */
4594         if (target == 0 || GET_MODE (target) != compute_mode)
4595           target = gen_reg_rtx (compute_mode);
4596
4597         if (rem_flag)
4598           {
4599             remainder
4600               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4601             quotient = gen_reg_rtx (compute_mode);
4602           }
4603         else
4604           {
4605             quotient
4606               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4607             remainder = gen_reg_rtx (compute_mode);
4608           }
4609
4610         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4611                                  quotient, remainder, 0))
4612           {
4613             /* This could be computed with a branch-less sequence.
4614                Save that for later.  */
4615             rtx tem;
4616             rtx_code_label *label = gen_label_rtx ();
4617             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4618             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4619                                 NULL_RTX, 0, OPTAB_WIDEN);
4620             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4621             expand_dec (quotient, const1_rtx);
4622             expand_inc (remainder, op1);
4623             emit_label (label);
4624             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4625           }
4626
4627         /* No luck with division elimination or divmod.  Have to do it
4628            by conditionally adjusting op0 *and* the result.  */
4629         {
4630           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4631           rtx adjusted_op0;
4632           rtx tem;
4633
4634           quotient = gen_reg_rtx (compute_mode);
4635           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4636           label1 = gen_label_rtx ();
4637           label2 = gen_label_rtx ();
4638           label3 = gen_label_rtx ();
4639           label4 = gen_label_rtx ();
4640           label5 = gen_label_rtx ();
4641           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4642           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4643           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4644                               quotient, 0, OPTAB_LIB_WIDEN);
4645           if (tem != quotient)
4646             emit_move_insn (quotient, tem);
4647           emit_jump_insn (targetm.gen_jump (label5));
4648           emit_barrier ();
4649           emit_label (label1);
4650           expand_inc (adjusted_op0, const1_rtx);
4651           emit_jump_insn (targetm.gen_jump (label4));
4652           emit_barrier ();
4653           emit_label (label2);
4654           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4655           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4656                               quotient, 0, OPTAB_LIB_WIDEN);
4657           if (tem != quotient)
4658             emit_move_insn (quotient, tem);
4659           emit_jump_insn (targetm.gen_jump (label5));
4660           emit_barrier ();
4661           emit_label (label3);
4662           expand_dec (adjusted_op0, const1_rtx);
4663           emit_label (label4);
4664           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4665                               quotient, 0, OPTAB_LIB_WIDEN);
4666           if (tem != quotient)
4667             emit_move_insn (quotient, tem);
4668           expand_dec (quotient, const1_rtx);
4669           emit_label (label5);
4670         }
4671         break;
4672
4673       case CEIL_DIV_EXPR:
4674       case CEIL_MOD_EXPR:
4675         if (unsignedp)
4676           {
4677             if (op1_is_constant
4678                 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4679                 && (size <= HOST_BITS_PER_WIDE_INT
4680                     || INTVAL (op1) >= 0))
4681               {
4682                 rtx t1, t2, t3;
4683                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4684                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4685                                    floor_log2 (d), tquotient, 1);
4686                 t2 = expand_binop (compute_mode, and_optab, op0,
4687                                    gen_int_mode (d - 1, compute_mode),
4688                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4689                 t3 = gen_reg_rtx (compute_mode);
4690                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4691                                       compute_mode, 1, 1);
4692                 if (t3 == 0)
4693                   {
4694                     rtx_code_label *lab;
4695                     lab = gen_label_rtx ();
4696                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4697                     expand_inc (t1, const1_rtx);
4698                     emit_label (lab);
4699                     quotient = t1;
4700                   }
4701                 else
4702                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4703                                                           t1, t3),
4704                                             tquotient);
4705                 break;
4706               }
4707
4708             /* Try using an instruction that produces both the quotient and
4709                remainder, using truncation.  We can easily compensate the
4710                quotient or remainder to get ceiling rounding, once we have the
4711                remainder.  Notice that we compute also the final remainder
4712                value here, and return the result right away.  */
4713             if (target == 0 || GET_MODE (target) != compute_mode)
4714               target = gen_reg_rtx (compute_mode);
4715
4716             if (rem_flag)
4717               {
4718                 remainder = (REG_P (target)
4719                              ? target : gen_reg_rtx (compute_mode));
4720                 quotient = gen_reg_rtx (compute_mode);
4721               }
4722             else
4723               {
4724                 quotient = (REG_P (target)
4725                             ? target : gen_reg_rtx (compute_mode));
4726                 remainder = gen_reg_rtx (compute_mode);
4727               }
4728
4729             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4730                                      remainder, 1))
4731               {
4732                 /* This could be computed with a branch-less sequence.
4733                    Save that for later.  */
4734                 rtx_code_label *label = gen_label_rtx ();
4735                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4736                                  compute_mode, label);
4737                 expand_inc (quotient, const1_rtx);
4738                 expand_dec (remainder, op1);
4739                 emit_label (label);
4740                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4741               }
4742
4743             /* No luck with division elimination or divmod.  Have to do it
4744                by conditionally adjusting op0 *and* the result.  */
4745             {
4746               rtx_code_label *label1, *label2;
4747               rtx adjusted_op0, tem;
4748
4749               quotient = gen_reg_rtx (compute_mode);
4750               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4751               label1 = gen_label_rtx ();
4752               label2 = gen_label_rtx ();
4753               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4754                                compute_mode, label1);
4755               emit_move_insn  (quotient, const0_rtx);
4756               emit_jump_insn (targetm.gen_jump (label2));
4757               emit_barrier ();
4758               emit_label (label1);
4759               expand_dec (adjusted_op0, const1_rtx);
4760               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4761                                   quotient, 1, OPTAB_LIB_WIDEN);
4762               if (tem != quotient)
4763                 emit_move_insn (quotient, tem);
4764               expand_inc (quotient, const1_rtx);
4765               emit_label (label2);
4766             }
4767           }
4768         else /* signed */
4769           {
4770             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4771                 && INTVAL (op1) >= 0)
4772               {
4773                 /* This is extremely similar to the code for the unsigned case
4774                    above.  For 2.7 we should merge these variants, but for
4775                    2.6.1 I don't want to touch the code for unsigned since that
4776                    get used in C.  The signed case will only be used by other
4777                    languages (Ada).  */
4778
4779                 rtx t1, t2, t3;
4780                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4781                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4782                                    floor_log2 (d), tquotient, 0);
4783                 t2 = expand_binop (compute_mode, and_optab, op0,
4784                                    gen_int_mode (d - 1, compute_mode),
4785                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4786                 t3 = gen_reg_rtx (compute_mode);
4787                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4788                                       compute_mode, 1, 1);
4789                 if (t3 == 0)
4790                   {
4791                     rtx_code_label *lab;
4792                     lab = gen_label_rtx ();
4793                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4794                     expand_inc (t1, const1_rtx);
4795                     emit_label (lab);
4796                     quotient = t1;
4797                   }
4798                 else
4799                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4800                                                           t1, t3),
4801                                             tquotient);
4802                 break;
4803               }
4804
4805             /* Try using an instruction that produces both the quotient and
4806                remainder, using truncation.  We can easily compensate the
4807                quotient or remainder to get ceiling rounding, once we have the
4808                remainder.  Notice that we compute also the final remainder
4809                value here, and return the result right away.  */
4810             if (target == 0 || GET_MODE (target) != compute_mode)
4811               target = gen_reg_rtx (compute_mode);
4812             if (rem_flag)
4813               {
4814                 remainder= (REG_P (target)
4815                             ? target : gen_reg_rtx (compute_mode));
4816                 quotient = gen_reg_rtx (compute_mode);
4817               }
4818             else
4819               {
4820                 quotient = (REG_P (target)
4821                             ? target : gen_reg_rtx (compute_mode));
4822                 remainder = gen_reg_rtx (compute_mode);
4823               }
4824
4825             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4826                                      remainder, 0))
4827               {
4828                 /* This could be computed with a branch-less sequence.
4829                    Save that for later.  */
4830                 rtx tem;
4831                 rtx_code_label *label = gen_label_rtx ();
4832                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4833                                  compute_mode, label);
4834                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4835                                     NULL_RTX, 0, OPTAB_WIDEN);
4836                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4837                 expand_inc (quotient, const1_rtx);
4838                 expand_dec (remainder, op1);
4839                 emit_label (label);
4840                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4841               }
4842
4843             /* No luck with division elimination or divmod.  Have to do it
4844                by conditionally adjusting op0 *and* the result.  */
4845             {
4846               rtx_code_label *label1, *label2, *label3, *label4, *label5;
4847               rtx adjusted_op0;
4848               rtx tem;
4849
4850               quotient = gen_reg_rtx (compute_mode);
4851               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4852               label1 = gen_label_rtx ();
4853               label2 = gen_label_rtx ();
4854               label3 = gen_label_rtx ();
4855               label4 = gen_label_rtx ();
4856               label5 = gen_label_rtx ();
4857               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4858               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4859                                compute_mode, label1);
4860               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4861                                   quotient, 0, OPTAB_LIB_WIDEN);
4862               if (tem != quotient)
4863                 emit_move_insn (quotient, tem);
4864               emit_jump_insn (targetm.gen_jump (label5));
4865               emit_barrier ();
4866               emit_label (label1);
4867               expand_dec (adjusted_op0, const1_rtx);
4868               emit_jump_insn (targetm.gen_jump (label4));
4869               emit_barrier ();
4870               emit_label (label2);
4871               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4872                                compute_mode, label3);
4873               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4874                                   quotient, 0, OPTAB_LIB_WIDEN);
4875               if (tem != quotient)
4876                 emit_move_insn (quotient, tem);
4877               emit_jump_insn (targetm.gen_jump (label5));
4878               emit_barrier ();
4879               emit_label (label3);
4880               expand_inc (adjusted_op0, const1_rtx);
4881               emit_label (label4);
4882               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4883                                   quotient, 0, OPTAB_LIB_WIDEN);
4884               if (tem != quotient)
4885                 emit_move_insn (quotient, tem);
4886               expand_inc (quotient, const1_rtx);
4887               emit_label (label5);
4888             }
4889           }
4890         break;
4891
4892       case EXACT_DIV_EXPR:
4893         if (op1_is_constant && size <= HOST_BITS_PER_WIDE_INT)
4894           {
4895             HOST_WIDE_INT d = INTVAL (op1);
4896             unsigned HOST_WIDE_INT ml;
4897             int pre_shift;
4898             rtx t1;
4899
4900             pre_shift = ctz_or_zero (d);
4901             ml = invert_mod2n (d >> pre_shift, size);
4902             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4903                                pre_shift, NULL_RTX, unsignedp);
4904             quotient = expand_mult (compute_mode, t1,
4905                                     gen_int_mode (ml, compute_mode),
4906                                     NULL_RTX, 1);
4907
4908             insn = get_last_insn ();
4909             set_dst_reg_note (insn, REG_EQUAL,
4910                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4911                                               compute_mode, op0, op1),
4912                               quotient);
4913           }
4914         break;
4915
4916       case ROUND_DIV_EXPR:
4917       case ROUND_MOD_EXPR:
4918         if (unsignedp)
4919           {
4920             rtx tem;
4921             rtx_code_label *label;
4922             label = gen_label_rtx ();
4923             quotient = gen_reg_rtx (compute_mode);
4924             remainder = gen_reg_rtx (compute_mode);
4925             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4926               {
4927                 rtx tem;
4928                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4929                                          quotient, 1, OPTAB_LIB_WIDEN);
4930                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4931                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4932                                           remainder, 1, OPTAB_LIB_WIDEN);
4933               }
4934             tem = plus_constant (compute_mode, op1, -1);
4935             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4936             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4937             expand_inc (quotient, const1_rtx);
4938             expand_dec (remainder, op1);
4939             emit_label (label);
4940           }
4941         else
4942           {
4943             rtx abs_rem, abs_op1, tem, mask;
4944             rtx_code_label *label;
4945             label = gen_label_rtx ();
4946             quotient = gen_reg_rtx (compute_mode);
4947             remainder = gen_reg_rtx (compute_mode);
4948             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4949               {
4950                 rtx tem;
4951                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4952                                          quotient, 0, OPTAB_LIB_WIDEN);
4953                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4954                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4955                                           remainder, 0, OPTAB_LIB_WIDEN);
4956               }
4957             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4958             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4959             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4960                                 1, NULL_RTX, 1);
4961             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4962             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4963                                 NULL_RTX, 0, OPTAB_WIDEN);
4964             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4965                                  size - 1, NULL_RTX, 0);
4966             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4967                                 NULL_RTX, 0, OPTAB_WIDEN);
4968             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4969                                 NULL_RTX, 0, OPTAB_WIDEN);
4970             expand_inc (quotient, tem);
4971             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4972                                 NULL_RTX, 0, OPTAB_WIDEN);
4973             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4974                                 NULL_RTX, 0, OPTAB_WIDEN);
4975             expand_dec (remainder, tem);
4976             emit_label (label);
4977           }
4978         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4979
4980       default:
4981         gcc_unreachable ();
4982       }
4983
4984   if (quotient == 0)
4985     {
4986       if (target && GET_MODE (target) != compute_mode)
4987         target = 0;
4988
4989       if (rem_flag)
4990         {
4991           /* Try to produce the remainder without producing the quotient.
4992              If we seem to have a divmod pattern that does not require widening,
4993              don't try widening here.  We should really have a WIDEN argument
4994              to expand_twoval_binop, since what we'd really like to do here is
4995              1) try a mod insn in compute_mode
4996              2) try a divmod insn in compute_mode
4997              3) try a div insn in compute_mode and multiply-subtract to get
4998                 remainder
4999              4) try the same things with widening allowed.  */
5000           remainder
5001             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5002                                  op0, op1, target,
5003                                  unsignedp,
5004                                  ((optab_handler (optab2, compute_mode)
5005                                    != CODE_FOR_nothing)
5006                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
5007           if (remainder == 0)
5008             {
5009               /* No luck there.  Can we do remainder and divide at once
5010                  without a library call?  */
5011               remainder = gen_reg_rtx (compute_mode);
5012               if (! expand_twoval_binop ((unsignedp
5013                                           ? udivmod_optab
5014                                           : sdivmod_optab),
5015                                          op0, op1,
5016                                          NULL_RTX, remainder, unsignedp))
5017                 remainder = 0;
5018             }
5019
5020           if (remainder)
5021             return gen_lowpart (mode, remainder);
5022         }
5023
5024       /* Produce the quotient.  Try a quotient insn, but not a library call.
5025          If we have a divmod in this mode, use it in preference to widening
5026          the div (for this test we assume it will not fail). Note that optab2
5027          is set to the one of the two optabs that the call below will use.  */
5028       quotient
5029         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5030                              op0, op1, rem_flag ? NULL_RTX : target,
5031                              unsignedp,
5032                              ((optab_handler (optab2, compute_mode)
5033                                != CODE_FOR_nothing)
5034                               ? OPTAB_DIRECT : OPTAB_WIDEN));
5035
5036       if (quotient == 0)
5037         {
5038           /* No luck there.  Try a quotient-and-remainder insn,
5039              keeping the quotient alone.  */
5040           quotient = gen_reg_rtx (compute_mode);
5041           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5042                                      op0, op1,
5043                                      quotient, NULL_RTX, unsignedp))
5044             {
5045               quotient = 0;
5046               if (! rem_flag)
5047                 /* Still no luck.  If we are not computing the remainder,
5048                    use a library call for the quotient.  */
5049                 quotient = sign_expand_binop (compute_mode,
5050                                               udiv_optab, sdiv_optab,
5051                                               op0, op1, target,
5052                                               unsignedp, OPTAB_LIB_WIDEN);
5053             }
5054         }
5055     }
5056
5057   if (rem_flag)
5058     {
5059       if (target && GET_MODE (target) != compute_mode)
5060         target = 0;
5061
5062       if (quotient == 0)
5063         {
5064           /* No divide instruction either.  Use library for remainder.  */
5065           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5066                                          op0, op1, target,
5067                                          unsignedp, OPTAB_LIB_WIDEN);
5068           /* No remainder function.  Try a quotient-and-remainder
5069              function, keeping the remainder.  */
5070           if (!remainder)
5071             {
5072               remainder = gen_reg_rtx (compute_mode);
5073               if (!expand_twoval_binop_libfunc
5074                   (unsignedp ? udivmod_optab : sdivmod_optab,
5075                    op0, op1,
5076                    NULL_RTX, remainder,
5077                    unsignedp ? UMOD : MOD))
5078                 remainder = NULL_RTX;
5079             }
5080         }
5081       else
5082         {
5083           /* We divided.  Now finish doing X - Y * (X / Y).  */
5084           remainder = expand_mult (compute_mode, quotient, op1,
5085                                    NULL_RTX, unsignedp);
5086           remainder = expand_binop (compute_mode, sub_optab, op0,
5087                                     remainder, target, unsignedp,
5088                                     OPTAB_LIB_WIDEN);
5089         }
5090     }
5091
5092   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5093 }
5094 \f
5095 /* Return a tree node with data type TYPE, describing the value of X.
5096    Usually this is an VAR_DECL, if there is no obvious better choice.
5097    X may be an expression, however we only support those expressions
5098    generated by loop.c.  */
5099
5100 tree
5101 make_tree (tree type, rtx x)
5102 {
5103   tree t;
5104
5105   switch (GET_CODE (x))
5106     {
5107     case CONST_INT:
5108     case CONST_WIDE_INT:
5109       t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5110       return t;
5111
5112     case CONST_DOUBLE:
5113       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5114       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5115         t = wide_int_to_tree (type,
5116                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5117                                                     HOST_BITS_PER_WIDE_INT * 2));
5118       else
5119         t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5120
5121       return t;
5122
5123     case CONST_VECTOR:
5124       {
5125         int units = CONST_VECTOR_NUNITS (x);
5126         tree itype = TREE_TYPE (type);
5127         tree *elts;
5128         int i;
5129
5130         /* Build a tree with vector elements.  */
5131         elts = XALLOCAVEC (tree, units);
5132         for (i = units - 1; i >= 0; --i)
5133           {
5134             rtx elt = CONST_VECTOR_ELT (x, i);
5135             elts[i] = make_tree (itype, elt);
5136           }
5137
5138         return build_vector (type, elts);
5139       }
5140
5141     case PLUS:
5142       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5143                           make_tree (type, XEXP (x, 1)));
5144
5145     case MINUS:
5146       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5147                           make_tree (type, XEXP (x, 1)));
5148
5149     case NEG:
5150       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5151
5152     case MULT:
5153       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5154                           make_tree (type, XEXP (x, 1)));
5155
5156     case ASHIFT:
5157       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5158                           make_tree (type, XEXP (x, 1)));
5159
5160     case LSHIFTRT:
5161       t = unsigned_type_for (type);
5162       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5163                                          make_tree (t, XEXP (x, 0)),
5164                                          make_tree (type, XEXP (x, 1))));
5165
5166     case ASHIFTRT:
5167       t = signed_type_for (type);
5168       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5169                                          make_tree (t, XEXP (x, 0)),
5170                                          make_tree (type, XEXP (x, 1))));
5171
5172     case DIV:
5173       if (TREE_CODE (type) != REAL_TYPE)
5174         t = signed_type_for (type);
5175       else
5176         t = type;
5177
5178       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5179                                          make_tree (t, XEXP (x, 0)),
5180                                          make_tree (t, XEXP (x, 1))));
5181     case UDIV:
5182       t = unsigned_type_for (type);
5183       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5184                                          make_tree (t, XEXP (x, 0)),
5185                                          make_tree (t, XEXP (x, 1))));
5186
5187     case SIGN_EXTEND:
5188     case ZERO_EXTEND:
5189       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5190                                           GET_CODE (x) == ZERO_EXTEND);
5191       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5192
5193     case CONST:
5194       return make_tree (type, XEXP (x, 0));
5195
5196     case SYMBOL_REF:
5197       t = SYMBOL_REF_DECL (x);
5198       if (t)
5199         return fold_convert (type, build_fold_addr_expr (t));
5200       /* fall through.  */
5201
5202     default:
5203       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5204
5205       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5206          address mode to pointer mode.  */
5207       if (POINTER_TYPE_P (type))
5208         x = convert_memory_address_addr_space
5209               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5210
5211       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5212          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5213       t->decl_with_rtl.rtl = x;
5214
5215       return t;
5216     }
5217 }
5218 \f
5219 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5220    and returning TARGET.
5221
5222    If TARGET is 0, a pseudo-register or constant is returned.  */
5223
5224 rtx
5225 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5226 {
5227   rtx tem = 0;
5228
5229   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5230     tem = simplify_binary_operation (AND, mode, op0, op1);
5231   if (tem == 0)
5232     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5233
5234   if (target == 0)
5235     target = tem;
5236   else if (tem != target)
5237     emit_move_insn (target, tem);
5238   return target;
5239 }
5240
5241 /* Helper function for emit_store_flag.  */
5242 rtx
5243 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5244              machine_mode mode, machine_mode compare_mode,
5245              int unsignedp, rtx x, rtx y, int normalizep,
5246              machine_mode target_mode)
5247 {
5248   struct expand_operand ops[4];
5249   rtx op0, comparison, subtarget;
5250   rtx_insn *last;
5251   machine_mode result_mode = targetm.cstore_mode (icode);
5252
5253   last = get_last_insn ();
5254   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5255   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5256   if (!x || !y)
5257     {
5258       delete_insns_since (last);
5259       return NULL_RTX;
5260     }
5261
5262   if (target_mode == VOIDmode)
5263     target_mode = result_mode;
5264   if (!target)
5265     target = gen_reg_rtx (target_mode);
5266
5267   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5268
5269   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5270   create_fixed_operand (&ops[1], comparison);
5271   create_fixed_operand (&ops[2], x);
5272   create_fixed_operand (&ops[3], y);
5273   if (!maybe_expand_insn (icode, 4, ops))
5274     {
5275       delete_insns_since (last);
5276       return NULL_RTX;
5277     }
5278   subtarget = ops[0].value;
5279
5280   /* If we are converting to a wider mode, first convert to
5281      TARGET_MODE, then normalize.  This produces better combining
5282      opportunities on machines that have a SIGN_EXTRACT when we are
5283      testing a single bit.  This mostly benefits the 68k.
5284
5285      If STORE_FLAG_VALUE does not have the sign bit set when
5286      interpreted in MODE, we can do this conversion as unsigned, which
5287      is usually more efficient.  */
5288   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5289     {
5290       convert_move (target, subtarget,
5291                     val_signbit_known_clear_p (result_mode,
5292                                                STORE_FLAG_VALUE));
5293       op0 = target;
5294       result_mode = target_mode;
5295     }
5296   else
5297     op0 = subtarget;
5298
5299   /* If we want to keep subexpressions around, don't reuse our last
5300      target.  */
5301   if (optimize)
5302     subtarget = 0;
5303
5304   /* Now normalize to the proper value in MODE.  Sometimes we don't
5305      have to do anything.  */
5306   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5307     ;
5308   /* STORE_FLAG_VALUE might be the most negative number, so write
5309      the comparison this way to avoid a compiler-time warning.  */
5310   else if (- normalizep == STORE_FLAG_VALUE)
5311     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5312
5313   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5314      it hard to use a value of just the sign bit due to ANSI integer
5315      constant typing rules.  */
5316   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5317     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5318                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5319                         normalizep == 1);
5320   else
5321     {
5322       gcc_assert (STORE_FLAG_VALUE & 1);
5323
5324       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5325       if (normalizep == -1)
5326         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5327     }
5328
5329   /* If we were converting to a smaller mode, do the conversion now.  */
5330   if (target_mode != result_mode)
5331     {
5332       convert_move (target, op0, 0);
5333       return target;
5334     }
5335   else
5336     return op0;
5337 }
5338
5339
5340 /* A subroutine of emit_store_flag only including "tricks" that do not
5341    need a recursive call.  These are kept separate to avoid infinite
5342    loops.  */
5343
5344 static rtx
5345 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5346                    machine_mode mode, int unsignedp, int normalizep,
5347                    machine_mode target_mode)
5348 {
5349   rtx subtarget;
5350   enum insn_code icode;
5351   machine_mode compare_mode;
5352   enum mode_class mclass;
5353   enum rtx_code scode;
5354
5355   if (unsignedp)
5356     code = unsigned_condition (code);
5357   scode = swap_condition (code);
5358
5359   /* If one operand is constant, make it the second one.  Only do this
5360      if the other operand is not constant as well.  */
5361
5362   if (swap_commutative_operands_p (op0, op1))
5363     {
5364       std::swap (op0, op1);
5365       code = swap_condition (code);
5366     }
5367
5368   if (mode == VOIDmode)
5369     mode = GET_MODE (op0);
5370
5371   /* For some comparisons with 1 and -1, we can convert this to
5372      comparisons with zero.  This will often produce more opportunities for
5373      store-flag insns.  */
5374
5375   switch (code)
5376     {
5377     case LT:
5378       if (op1 == const1_rtx)
5379         op1 = const0_rtx, code = LE;
5380       break;
5381     case LE:
5382       if (op1 == constm1_rtx)
5383         op1 = const0_rtx, code = LT;
5384       break;
5385     case GE:
5386       if (op1 == const1_rtx)
5387         op1 = const0_rtx, code = GT;
5388       break;
5389     case GT:
5390       if (op1 == constm1_rtx)
5391         op1 = const0_rtx, code = GE;
5392       break;
5393     case GEU:
5394       if (op1 == const1_rtx)
5395         op1 = const0_rtx, code = NE;
5396       break;
5397     case LTU:
5398       if (op1 == const1_rtx)
5399         op1 = const0_rtx, code = EQ;
5400       break;
5401     default:
5402       break;
5403     }
5404
5405   /* If we are comparing a double-word integer with zero or -1, we can
5406      convert the comparison into one involving a single word.  */
5407   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5408       && GET_MODE_CLASS (mode) == MODE_INT
5409       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5410     {
5411       rtx tem;
5412       if ((code == EQ || code == NE)
5413           && (op1 == const0_rtx || op1 == constm1_rtx))
5414         {
5415           rtx op00, op01;
5416
5417           /* Do a logical OR or AND of the two words and compare the
5418              result.  */
5419           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5420           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5421           tem = expand_binop (word_mode,
5422                               op1 == const0_rtx ? ior_optab : and_optab,
5423                               op00, op01, NULL_RTX, unsignedp,
5424                               OPTAB_DIRECT);
5425
5426           if (tem != 0)
5427             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5428                                    unsignedp, normalizep);
5429         }
5430       else if ((code == LT || code == GE) && op1 == const0_rtx)
5431         {
5432           rtx op0h;
5433
5434           /* If testing the sign bit, can just test on high word.  */
5435           op0h = simplify_gen_subreg (word_mode, op0, mode,
5436                                       subreg_highpart_offset (word_mode,
5437                                                               mode));
5438           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5439                                  unsignedp, normalizep);
5440         }
5441       else
5442         tem = NULL_RTX;
5443
5444       if (tem)
5445         {
5446           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5447             return tem;
5448           if (!target)
5449             target = gen_reg_rtx (target_mode);
5450
5451           convert_move (target, tem,
5452                         !val_signbit_known_set_p (word_mode,
5453                                                   (normalizep ? normalizep
5454                                                    : STORE_FLAG_VALUE)));
5455           return target;
5456         }
5457     }
5458
5459   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5460      complement of A (for GE) and shifting the sign bit to the low bit.  */
5461   if (op1 == const0_rtx && (code == LT || code == GE)
5462       && GET_MODE_CLASS (mode) == MODE_INT
5463       && (normalizep || STORE_FLAG_VALUE == 1
5464           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5465     {
5466       subtarget = target;
5467
5468       if (!target)
5469         target_mode = mode;
5470
5471       /* If the result is to be wider than OP0, it is best to convert it
5472          first.  If it is to be narrower, it is *incorrect* to convert it
5473          first.  */
5474       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5475         {
5476           op0 = convert_modes (target_mode, mode, op0, 0);
5477           mode = target_mode;
5478         }
5479
5480       if (target_mode != mode)
5481         subtarget = 0;
5482
5483       if (code == GE)
5484         op0 = expand_unop (mode, one_cmpl_optab, op0,
5485                            ((STORE_FLAG_VALUE == 1 || normalizep)
5486                             ? 0 : subtarget), 0);
5487
5488       if (STORE_FLAG_VALUE == 1 || normalizep)
5489         /* If we are supposed to produce a 0/1 value, we want to do
5490            a logical shift from the sign bit to the low-order bit; for
5491            a -1/0 value, we do an arithmetic shift.  */
5492         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5493                             GET_MODE_BITSIZE (mode) - 1,
5494                             subtarget, normalizep != -1);
5495
5496       if (mode != target_mode)
5497         op0 = convert_modes (target_mode, mode, op0, 0);
5498
5499       return op0;
5500     }
5501
5502   mclass = GET_MODE_CLASS (mode);
5503   for (compare_mode = mode; compare_mode != VOIDmode;
5504        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5505     {
5506      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5507      icode = optab_handler (cstore_optab, optab_mode);
5508      if (icode != CODE_FOR_nothing)
5509         {
5510           do_pending_stack_adjust ();
5511           rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5512                                  unsignedp, op0, op1, normalizep, target_mode);
5513           if (tem)
5514             return tem;
5515
5516           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5517             {
5518               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5519                                  unsignedp, op1, op0, normalizep, target_mode);
5520               if (tem)
5521                 return tem;
5522             }
5523           break;
5524         }
5525     }
5526
5527   return 0;
5528 }
5529
5530 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5531    and storing in TARGET.  Normally return TARGET.
5532    Return 0 if that cannot be done.
5533
5534    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5535    it is VOIDmode, they cannot both be CONST_INT.
5536
5537    UNSIGNEDP is for the case where we have to widen the operands
5538    to perform the operation.  It says to use zero-extension.
5539
5540    NORMALIZEP is 1 if we should convert the result to be either zero
5541    or one.  Normalize is -1 if we should convert the result to be
5542    either zero or -1.  If NORMALIZEP is zero, the result will be left
5543    "raw" out of the scc insn.  */
5544
5545 rtx
5546 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5547                  machine_mode mode, int unsignedp, int normalizep)
5548 {
5549   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5550   enum rtx_code rcode;
5551   rtx subtarget;
5552   rtx tem, trueval;
5553   rtx_insn *last;
5554
5555   /* If we compare constants, we shouldn't use a store-flag operation,
5556      but a constant load.  We can get there via the vanilla route that
5557      usually generates a compare-branch sequence, but will in this case
5558      fold the comparison to a constant, and thus elide the branch.  */
5559   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5560     return NULL_RTX;
5561
5562   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5563                            target_mode);
5564   if (tem)
5565     return tem;
5566
5567   /* If we reached here, we can't do this with a scc insn, however there
5568      are some comparisons that can be done in other ways.  Don't do any
5569      of these cases if branches are very cheap.  */
5570   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5571     return 0;
5572
5573   /* See what we need to return.  We can only return a 1, -1, or the
5574      sign bit.  */
5575
5576   if (normalizep == 0)
5577     {
5578       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5579         normalizep = STORE_FLAG_VALUE;
5580
5581       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5582         ;
5583       else
5584         return 0;
5585     }
5586
5587   last = get_last_insn ();
5588
5589   /* If optimizing, use different pseudo registers for each insn, instead
5590      of reusing the same pseudo.  This leads to better CSE, but slows
5591      down the compiler, since there are more pseudos */
5592   subtarget = (!optimize
5593                && (target_mode == mode)) ? target : NULL_RTX;
5594   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5595
5596   /* For floating-point comparisons, try the reverse comparison or try
5597      changing the "orderedness" of the comparison.  */
5598   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5599     {
5600       enum rtx_code first_code;
5601       bool and_them;
5602
5603       rcode = reverse_condition_maybe_unordered (code);
5604       if (can_compare_p (rcode, mode, ccp_store_flag)
5605           && (code == ORDERED || code == UNORDERED
5606               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5607               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5608         {
5609           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5610                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5611
5612           /* For the reverse comparison, use either an addition or a XOR.  */
5613           if (want_add
5614               && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5615                            optimize_insn_for_speed_p ()) == 0)
5616             {
5617               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5618                                        STORE_FLAG_VALUE, target_mode);
5619               if (tem)
5620                 return expand_binop (target_mode, add_optab, tem,
5621                                      gen_int_mode (normalizep, target_mode),
5622                                      target, 0, OPTAB_WIDEN);
5623             }
5624           else if (!want_add
5625                    && rtx_cost (trueval, mode, XOR, 1,
5626                                 optimize_insn_for_speed_p ()) == 0)
5627             {
5628               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5629                                        normalizep, target_mode);
5630               if (tem)
5631                 return expand_binop (target_mode, xor_optab, tem, trueval,
5632                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5633             }
5634         }
5635
5636       delete_insns_since (last);
5637
5638       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5639       if (code == ORDERED || code == UNORDERED)
5640         return 0;
5641
5642       and_them = split_comparison (code, mode, &first_code, &code);
5643
5644       /* If there are no NaNs, the first comparison should always fall through.
5645          Effectively change the comparison to the other one.  */
5646       if (!HONOR_NANS (mode))
5647         {
5648           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5649           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5650                                     target_mode);
5651         }
5652
5653       if (!HAVE_conditional_move)
5654         return 0;
5655
5656       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5657          conditional move.  */
5658       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5659                                normalizep, target_mode);
5660       if (tem == 0)
5661         return 0;
5662
5663       if (and_them)
5664         tem = emit_conditional_move (target, code, op0, op1, mode,
5665                                      tem, const0_rtx, GET_MODE (tem), 0);
5666       else
5667         tem = emit_conditional_move (target, code, op0, op1, mode,
5668                                      trueval, tem, GET_MODE (tem), 0);
5669
5670       if (tem == 0)
5671         delete_insns_since (last);
5672       return tem;
5673     }
5674
5675   /* The remaining tricks only apply to integer comparisons.  */
5676
5677   if (GET_MODE_CLASS (mode) != MODE_INT)
5678     return 0;
5679
5680   /* If this is an equality comparison of integers, we can try to exclusive-or
5681      (or subtract) the two operands and use a recursive call to try the
5682      comparison with zero.  Don't do any of these cases if branches are
5683      very cheap.  */
5684
5685   if ((code == EQ || code == NE) && op1 != const0_rtx)
5686     {
5687       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5688                           OPTAB_WIDEN);
5689
5690       if (tem == 0)
5691         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5692                             OPTAB_WIDEN);
5693       if (tem != 0)
5694         tem = emit_store_flag (target, code, tem, const0_rtx,
5695                                mode, unsignedp, normalizep);
5696       if (tem != 0)
5697         return tem;
5698
5699       delete_insns_since (last);
5700     }
5701
5702   /* For integer comparisons, try the reverse comparison.  However, for
5703      small X and if we'd have anyway to extend, implementing "X != 0"
5704      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5705   rcode = reverse_condition (code);
5706   if (can_compare_p (rcode, mode, ccp_store_flag)
5707       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5708             && code == NE
5709             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5710             && op1 == const0_rtx))
5711     {
5712       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5713                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5714
5715       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5716       if (want_add
5717           && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5718                        optimize_insn_for_speed_p ()) == 0)
5719         {
5720           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5721                                    STORE_FLAG_VALUE, target_mode);
5722           if (tem != 0)
5723             tem = expand_binop (target_mode, add_optab, tem,
5724                                 gen_int_mode (normalizep, target_mode),
5725                                 target, 0, OPTAB_WIDEN);
5726         }
5727       else if (!want_add
5728                && rtx_cost (trueval, mode, XOR, 1,
5729                             optimize_insn_for_speed_p ()) == 0)
5730         {
5731           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5732                                    normalizep, target_mode);
5733           if (tem != 0)
5734             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5735                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5736         }
5737
5738       if (tem != 0)
5739         return tem;
5740       delete_insns_since (last);
5741     }
5742
5743   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5744      the constant zero.  Reject all other comparisons at this point.  Only
5745      do LE and GT if branches are expensive since they are expensive on
5746      2-operand machines.  */
5747
5748   if (op1 != const0_rtx
5749       || (code != EQ && code != NE
5750           && (BRANCH_COST (optimize_insn_for_speed_p (),
5751                            false) <= 1 || (code != LE && code != GT))))
5752     return 0;
5753
5754   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5755      do the necessary operation below.  */
5756
5757   tem = 0;
5758
5759   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5760      the sign bit set.  */
5761
5762   if (code == LE)
5763     {
5764       /* This is destructive, so SUBTARGET can't be OP0.  */
5765       if (rtx_equal_p (subtarget, op0))
5766         subtarget = 0;
5767
5768       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5769                           OPTAB_WIDEN);
5770       if (tem)
5771         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5772                             OPTAB_WIDEN);
5773     }
5774
5775   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5776      number of bits in the mode of OP0, minus one.  */
5777
5778   if (code == GT)
5779     {
5780       if (rtx_equal_p (subtarget, op0))
5781         subtarget = 0;
5782
5783       tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5784                                 GET_MODE_BITSIZE (mode) - 1,
5785                                 subtarget, 0);
5786       if (tem)
5787         tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5788                             OPTAB_WIDEN);
5789     }
5790
5791   if (code == EQ || code == NE)
5792     {
5793       /* For EQ or NE, one way to do the comparison is to apply an operation
5794          that converts the operand into a positive number if it is nonzero
5795          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5796          for NE we negate.  This puts the result in the sign bit.  Then we
5797          normalize with a shift, if needed.
5798
5799          Two operations that can do the above actions are ABS and FFS, so try
5800          them.  If that doesn't work, and MODE is smaller than a full word,
5801          we can use zero-extension to the wider mode (an unsigned conversion)
5802          as the operation.  */
5803
5804       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5805          that is compensated by the subsequent overflow when subtracting
5806          one / negating.  */
5807
5808       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5809         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5810       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5811         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5812       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5813         {
5814           tem = convert_modes (word_mode, mode, op0, 1);
5815           mode = word_mode;
5816         }
5817
5818       if (tem != 0)
5819         {
5820           if (code == EQ)
5821             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5822                                 0, OPTAB_WIDEN);
5823           else
5824             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5825         }
5826
5827       /* If we couldn't do it that way, for NE we can "or" the two's complement
5828          of the value with itself.  For EQ, we take the one's complement of
5829          that "or", which is an extra insn, so we only handle EQ if branches
5830          are expensive.  */
5831
5832       if (tem == 0
5833           && (code == NE
5834               || BRANCH_COST (optimize_insn_for_speed_p (),
5835                               false) > 1))
5836         {
5837           if (rtx_equal_p (subtarget, op0))
5838             subtarget = 0;
5839
5840           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5841           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5842                               OPTAB_WIDEN);
5843
5844           if (tem && code == EQ)
5845             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5846         }
5847     }
5848
5849   if (tem && normalizep)
5850     tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5851                               GET_MODE_BITSIZE (mode) - 1,
5852                               subtarget, normalizep == 1);
5853
5854   if (tem)
5855     {
5856       if (!target)
5857         ;
5858       else if (GET_MODE (tem) != target_mode)
5859         {
5860           convert_move (target, tem, 0);
5861           tem = target;
5862         }
5863       else if (!subtarget)
5864         {
5865           emit_move_insn (target, tem);
5866           tem = target;
5867         }
5868     }
5869   else
5870     delete_insns_since (last);
5871
5872   return tem;
5873 }
5874
5875 /* Like emit_store_flag, but always succeeds.  */
5876
5877 rtx
5878 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5879                        machine_mode mode, int unsignedp, int normalizep)
5880 {
5881   rtx tem;
5882   rtx_code_label *label;
5883   rtx trueval, falseval;
5884
5885   /* First see if emit_store_flag can do the job.  */
5886   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5887   if (tem != 0)
5888     return tem;
5889
5890   if (!target)
5891     target = gen_reg_rtx (word_mode);
5892
5893   /* If this failed, we have to do this with set/compare/jump/set code.
5894      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5895   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5896   if (code == NE
5897       && GET_MODE_CLASS (mode) == MODE_INT
5898       && REG_P (target)
5899       && op0 == target
5900       && op1 == const0_rtx)
5901     {
5902       label = gen_label_rtx ();
5903       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
5904                                NULL_RTX, NULL, label, -1);
5905       emit_move_insn (target, trueval);
5906       emit_label (label);
5907       return target;
5908     }
5909
5910   if (!REG_P (target)
5911       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5912     target = gen_reg_rtx (GET_MODE (target));
5913
5914   /* Jump in the right direction if the target cannot implement CODE
5915      but can jump on its reverse condition.  */
5916   falseval = const0_rtx;
5917   if (! can_compare_p (code, mode, ccp_jump)
5918       && (! FLOAT_MODE_P (mode)
5919           || code == ORDERED || code == UNORDERED
5920           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5921           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5922     {
5923       enum rtx_code rcode;
5924       if (FLOAT_MODE_P (mode))
5925         rcode = reverse_condition_maybe_unordered (code);
5926       else
5927         rcode = reverse_condition (code);
5928
5929       /* Canonicalize to UNORDERED for the libcall.  */
5930       if (can_compare_p (rcode, mode, ccp_jump)
5931           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5932         {
5933           falseval = trueval;
5934           trueval = const0_rtx;
5935           code = rcode;
5936         }
5937     }
5938
5939   emit_move_insn (target, trueval);
5940   label = gen_label_rtx ();
5941   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
5942                            label, -1);
5943
5944   emit_move_insn (target, falseval);
5945   emit_label (label);
5946
5947   return target;
5948 }
5949 \f
5950 /* Perform possibly multi-word comparison and conditional jump to LABEL
5951    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5952    now a thin wrapper around do_compare_rtx_and_jump.  */
5953
5954 static void
5955 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
5956                  rtx_code_label *label)
5957 {
5958   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5959   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
5960                            NULL, label, -1);
5961 }