gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2016 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "target.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "predict.h"
  30 #include "memmodel.h"
  31 #include "tm_p.h"
  32 #include "expmed.h"
  33 #include "optabs.h"
  34 #include "emit-rtl.h"
  35 #include "diagnostic-core.h"
  36 #include "fold-const.h"
  37 #include "stor-layout.h"
  38 #include "dojump.h"
  39 #include "explow.h"
  40 #include "expr.h"
  41 #include "langhooks.h"
  42
  43 struct target_expmed default_target_expmed;
  44 #if SWITCHABLE_TARGET
  45 struct target_expmed *this_target_expmed = &default_target_expmed;
  46 #endif
  47
  48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  49                                    unsigned HOST_WIDE_INT,
  50                                    unsigned HOST_WIDE_INT,
  51                                    unsigned HOST_WIDE_INT,
  52                                    rtx, bool);
  53 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
  54                                      unsigned HOST_WIDE_INT,
  55                                      rtx, bool);
  56 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  57                                    unsigned HOST_WIDE_INT,
  58                                    unsigned HOST_WIDE_INT,
  59                                    unsigned HOST_WIDE_INT,
  60                                    rtx, bool);
  61 static rtx extract_fixed_bit_field (machine_mode, rtx,
  62                                     unsigned HOST_WIDE_INT,
  63                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  64 static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
  65                                       unsigned HOST_WIDE_INT,
  66                                       unsigned HOST_WIDE_INT, rtx, int, bool);
  67 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  68 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  69                                     unsigned HOST_WIDE_INT, int, bool);
  70 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  71 static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  72 static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  73
  74 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  75    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  76    The mask is truncated if necessary to the width of mode MODE.  The
  77    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
  78
  79 static inline rtx
  80 mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
  81 {
  82   return immed_wide_int_const
  83     (wi::shifted_mask (bitpos, bitsize, complement,
  84                        GET_MODE_PRECISION (mode)), mode);
  85 }
  86
  87 /* Test whether a value is zero of a power of two.  */
  88 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
  89   (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
  90
  91 struct init_expmed_rtl
  92 {
  93   rtx reg;
  94   rtx plus;
  95   rtx neg;
  96   rtx mult;
  97   rtx sdiv;
  98   rtx udiv;
  99   rtx sdiv_32;
 100   rtx smod_32;
 101   rtx wide_mult;
 102   rtx wide_lshr;
 103   rtx wide_trunc;
 104   rtx shift;
 105   rtx shift_mult;
 106   rtx shift_add;
 107   rtx shift_sub0;
 108   rtx shift_sub1;
 109   rtx zext;
 110   rtx trunc;
 111
 112   rtx pow2[MAX_BITS_PER_WORD];
 113   rtx cint[MAX_BITS_PER_WORD];
 114 };
 115
 116 static void
 117 init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
 118                       machine_mode from_mode, bool speed)
 119 {
 120   int to_size, from_size;
 121   rtx which;
 122
 123   to_size = GET_MODE_PRECISION (to_mode);
 124   from_size = GET_MODE_PRECISION (from_mode);
 125
 126   /* Most partial integers have a precision less than the "full"
 127      integer it requires for storage.  In case one doesn't, for
 128      comparison purposes here, reduce the bit size by one in that
 129      case.  */
 130   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 131       && pow2p_hwi (to_size))
 132     to_size --;
 133   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 134       && pow2p_hwi (from_size))
 135     from_size --;
 136
 137   /* Assume cost of zero-extend and sign-extend is the same.  */
 138   which = (to_size < from_size ? all->trunc : all->zext);
 139
 140   PUT_MODE (all->reg, from_mode);
 141   set_convert_cost (to_mode, from_mode, speed,
 142                     set_src_cost (which, to_mode, speed));
 143 }
 144
 145 static void
 146 init_expmed_one_mode (struct init_expmed_rtl *all,
 147                       machine_mode mode, int speed)
 148 {
 149   int m, n, mode_bitsize;
 150   machine_mode mode_from;
 151
 152   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 153
 154   PUT_MODE (all->reg, mode);
 155   PUT_MODE (all->plus, mode);
 156   PUT_MODE (all->neg, mode);
 157   PUT_MODE (all->mult, mode);
 158   PUT_MODE (all->sdiv, mode);
 159   PUT_MODE (all->udiv, mode);
 160   PUT_MODE (all->sdiv_32, mode);
 161   PUT_MODE (all->smod_32, mode);
 162   PUT_MODE (all->wide_trunc, mode);
 163   PUT_MODE (all->shift, mode);
 164   PUT_MODE (all->shift_mult, mode);
 165   PUT_MODE (all->shift_add, mode);
 166   PUT_MODE (all->shift_sub0, mode);
 167   PUT_MODE (all->shift_sub1, mode);
 168   PUT_MODE (all->zext, mode);
 169   PUT_MODE (all->trunc, mode);
 170
 171   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
 172   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
 173   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
 174   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
 175   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
 176
 177   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
 178                                      <= 2 * add_cost (speed, mode)));
 179   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
 180                                      <= 4 * add_cost (speed, mode)));
 181
 182   set_shift_cost (speed, mode, 0, 0);
 183   {
 184     int cost = add_cost (speed, mode);
 185     set_shiftadd_cost (speed, mode, 0, cost);
 186     set_shiftsub0_cost (speed, mode, 0, cost);
 187     set_shiftsub1_cost (speed, mode, 0, cost);
 188   }
 189
 190   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 191   for (m = 1; m < n; m++)
 192     {
 193       XEXP (all->shift, 1) = all->cint[m];
 194       XEXP (all->shift_mult, 1) = all->pow2[m];
 195
 196       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
 197       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
 198                                                        speed));
 199       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
 200                                                         speed));
 201       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
 202                                                         speed));
 203     }
 204
 205   if (SCALAR_INT_MODE_P (mode))
 206     {
 207       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 208            mode_from = (machine_mode)(mode_from + 1))
 209         init_expmed_one_conv (all, mode, mode_from, speed);
 210     }
 211   if (GET_MODE_CLASS (mode) == MODE_INT)
 212     {
 213       machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
 214       if (wider_mode != VOIDmode)
 215         {
 216           PUT_MODE (all->zext, wider_mode);
 217           PUT_MODE (all->wide_mult, wider_mode);
 218           PUT_MODE (all->wide_lshr, wider_mode);
 219           XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 220
 221           set_mul_widen_cost (speed, wider_mode,
 222                               set_src_cost (all->wide_mult, wider_mode, speed));
 223           set_mul_highpart_cost (speed, mode,
 224                                  set_src_cost (all->wide_trunc, mode, speed));
 225         }
 226     }
 227 }
 228
 229 void
 230 init_expmed (void)
 231 {
 232   struct init_expmed_rtl all;
 233   machine_mode mode = QImode;
 234   int m, speed;
 235
 236   memset (&all, 0, sizeof all);
 237   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 238     {
 239       all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
 240       all.cint[m] = GEN_INT (m);
 241     }
 242
 243   /* Avoid using hard regs in ways which may be unsupported.  */
 244   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 245   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 246   all.neg = gen_rtx_NEG (mode, all.reg);
 247   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 248   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 249   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 250   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 251   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 252   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 253   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 254   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 255   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 256   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 257   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 258   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 259   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 260   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 261   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 262
 263   for (speed = 0; speed < 2; speed++)
 264     {
 265       crtl->maybe_hot_insn_p = speed;
 266       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
 267
 268       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 269            mode = (machine_mode)(mode + 1))
 270         init_expmed_one_mode (&all, mode, speed);
 271
 272       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 273         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 274              mode = (machine_mode)(mode + 1))
 275           init_expmed_one_mode (&all, mode, speed);
 276
 277       if (MIN_MODE_VECTOR_INT != VOIDmode)
 278         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 279              mode = (machine_mode)(mode + 1))
 280           init_expmed_one_mode (&all, mode, speed);
 281     }
 282
 283   if (alg_hash_used_p ())
 284     {
 285       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 286       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 287     }
 288   else
 289     set_alg_hash_used_p (true);
 290   default_rtl_profile ();
 291
 292   ggc_free (all.trunc);
 293   ggc_free (all.shift_sub1);
 294   ggc_free (all.shift_sub0);
 295   ggc_free (all.shift_add);
 296   ggc_free (all.shift_mult);
 297   ggc_free (all.shift);
 298   ggc_free (all.wide_trunc);
 299   ggc_free (all.wide_lshr);
 300   ggc_free (all.wide_mult);
 301   ggc_free (all.zext);
 302   ggc_free (all.smod_32);
 303   ggc_free (all.sdiv_32);
 304   ggc_free (all.udiv);
 305   ggc_free (all.sdiv);
 306   ggc_free (all.mult);
 307   ggc_free (all.neg);
 308   ggc_free (all.plus);
 309   ggc_free (all.reg);
 310 }
 311
 312 /* Return an rtx representing minus the value of X.
 313    MODE is the intended mode of the result,
 314    useful if X is a CONST_INT.  */
 315
 316 rtx
 317 negate_rtx (machine_mode mode, rtx x)
 318 {
 319   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 320
 321   if (result == 0)
 322     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 323
 324   return result;
 325 }
 326
 327 /* Whether reverse storage order is supported on the target.  */
 328 static int reverse_storage_order_supported = -1;
 329
 330 /* Check whether reverse storage order is supported on the target.  */
 331
 332 static void
 333 check_reverse_storage_order_support (void)
 334 {
 335   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 336     {
 337       reverse_storage_order_supported = 0;
 338       sorry ("reverse scalar storage order");
 339     }
 340   else
 341     reverse_storage_order_supported = 1;
 342 }
 343
 344 /* Whether reverse FP storage order is supported on the target.  */
 345 static int reverse_float_storage_order_supported = -1;
 346
 347 /* Check whether reverse FP storage order is supported on the target.  */
 348
 349 static void
 350 check_reverse_float_storage_order_support (void)
 351 {
 352   if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 353     {
 354       reverse_float_storage_order_supported = 0;
 355       sorry ("reverse floating-point scalar storage order");
 356     }
 357   else
 358     reverse_float_storage_order_supported = 1;
 359 }
 360
 361 /* Return an rtx representing value of X with reverse storage order.
 362    MODE is the intended mode of the result,
 363    useful if X is a CONST_INT.  */
 364
 365 rtx
 366 flip_storage_order (enum machine_mode mode, rtx x)
 367 {
 368   enum machine_mode int_mode;
 369   rtx result;
 370
 371   if (mode == QImode)
 372     return x;
 373
 374   if (COMPLEX_MODE_P (mode))
 375     {
 376       rtx real = read_complex_part (x, false);
 377       rtx imag = read_complex_part (x, true);
 378
 379       real = flip_storage_order (GET_MODE_INNER (mode), real);
 380       imag = flip_storage_order (GET_MODE_INNER (mode), imag);
 381
 382       return gen_rtx_CONCAT (mode, real, imag);
 383     }
 384
 385   if (__builtin_expect (reverse_storage_order_supported < 0, 0))
 386     check_reverse_storage_order_support ();
 387
 388   if (SCALAR_INT_MODE_P (mode))
 389     int_mode = mode;
 390   else
 391     {
 392       if (FLOAT_MODE_P (mode)
 393           && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
 394         check_reverse_float_storage_order_support ();
 395
 396       int_mode = mode_for_size (GET_MODE_PRECISION (mode), MODE_INT, 0);
 397       if (int_mode == BLKmode)
 398         {
 399           sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
 400           return x;
 401         }
 402       x = gen_lowpart (int_mode, x);
 403     }
 404
 405   result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
 406   if (result == 0)
 407     result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
 408
 409   if (int_mode != mode)
 410     result = gen_lowpart (mode, result);
 411
 412   return result;
 413 }
 414
 415 /* Adjust bitfield memory MEM so that it points to the first unit of mode
 416    MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
 417    If MODE is BLKmode, return a reference to every byte in the bitfield.
 418    Set *NEW_BITNUM to the bit position of the field within the new memory.  */
 419
 420 static rtx
 421 narrow_bit_field_mem (rtx mem, machine_mode mode,
 422                       unsigned HOST_WIDE_INT bitsize,
 423                       unsigned HOST_WIDE_INT bitnum,
 424                       unsigned HOST_WIDE_INT *new_bitnum)
 425 {
 426   if (mode == BLKmode)
 427     {
 428       *new_bitnum = bitnum % BITS_PER_UNIT;
 429       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 430       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 431                             / BITS_PER_UNIT);
 432       return adjust_bitfield_address_size (mem, mode, offset, size);
 433     }
 434   else
 435     {
 436       unsigned int unit = GET_MODE_BITSIZE (mode);
 437       *new_bitnum = bitnum % unit;
 438       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 439       return adjust_bitfield_address (mem, mode, offset);
 440     }
 441 }
 442
 443 /* The caller wants to perform insertion or extraction PATTERN on a
 444    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 445    BITREGION_START and BITREGION_END are as for store_bit_field
 446    and FIELDMODE is the natural mode of the field.
 447
 448    Search for a mode that is compatible with the memory access
 449    restrictions and (where applicable) with a register insertion or
 450    extraction.  Return the new memory on success, storing the adjusted
 451    bit position in *NEW_BITNUM.  Return null otherwise.  */
 452
 453 static rtx
 454 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 455                               rtx op0, HOST_WIDE_INT bitsize,
 456                               HOST_WIDE_INT bitnum,
 457                               unsigned HOST_WIDE_INT bitregion_start,
 458                               unsigned HOST_WIDE_INT bitregion_end,
 459                               machine_mode fieldmode,
 460                               unsigned HOST_WIDE_INT *new_bitnum)
 461 {
 462   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 463                                 bitregion_end, MEM_ALIGN (op0),
 464                                 MEM_VOLATILE_P (op0));
 465   machine_mode best_mode;
 466   if (iter.next_mode (&best_mode))
 467     {
 468       /* We can use a memory in BEST_MODE.  See whether this is true for
 469          any wider modes.  All other things being equal, we prefer to
 470          use the widest mode possible because it tends to expose more
 471          CSE opportunities.  */
 472       if (!iter.prefer_smaller_modes ())
 473         {
 474           /* Limit the search to the mode required by the corresponding
 475              register insertion or extraction instruction, if any.  */
 476           machine_mode limit_mode = word_mode;
 477           extraction_insn insn;
 478           if (get_best_reg_extraction_insn (&insn, pattern,
 479                                             GET_MODE_BITSIZE (best_mode),
 480                                             fieldmode))
 481             limit_mode = insn.field_mode;
 482
 483           machine_mode wider_mode;
 484           while (iter.next_mode (&wider_mode)
 485                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 486             best_mode = wider_mode;
 487         }
 488       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 489                                    new_bitnum);
 490     }
 491   return NULL_RTX;
 492 }
 493
 494 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 495    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 496    offset is then BITNUM / BITS_PER_UNIT.  */
 497
 498 static bool
 499 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 500                      unsigned HOST_WIDE_INT bitsize,
 501                      machine_mode struct_mode)
 502 {
 503   if (BYTES_BIG_ENDIAN)
 504     return (bitnum % BITS_PER_UNIT == 0
 505             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 506                 || (bitnum + bitsize) % BITS_PER_WORD == 0));
 507   else
 508     return bitnum % BITS_PER_WORD == 0;
 509 }
 510
 511 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 512    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 513    Return false if the access would touch memory outside the range
 514    BITREGION_START to BITREGION_END for conformance to the C++ memory
 515    model.  */
 516
 517 static bool
 518 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 519                             unsigned HOST_WIDE_INT bitnum,
 520                             machine_mode fieldmode,
 521                             unsigned HOST_WIDE_INT bitregion_start,
 522                             unsigned HOST_WIDE_INT bitregion_end)
 523 {
 524   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 525
 526   /* -fstrict-volatile-bitfields must be enabled and we must have a
 527      volatile MEM.  */
 528   if (!MEM_P (op0)
 529       || !MEM_VOLATILE_P (op0)
 530       || flag_strict_volatile_bitfields <= 0)
 531     return false;
 532
 533   /* Non-integral modes likely only happen with packed structures.
 534      Punt.  */
 535   if (!SCALAR_INT_MODE_P (fieldmode))
 536     return false;
 537
 538   /* The bit size must not be larger than the field mode, and
 539      the field mode must not be larger than a word.  */
 540   if (bitsize > modesize || modesize > BITS_PER_WORD)
 541     return false;
 542
 543   /* Check for cases of unaligned fields that must be split.  */
 544   if (bitnum % modesize + bitsize > modesize)
 545     return false;
 546
 547   /* The memory must be sufficiently aligned for a MODESIZE access.
 548      This condition guarantees, that the memory access will not
 549      touch anything after the end of the structure.  */
 550   if (MEM_ALIGN (op0) < modesize)
 551     return false;
 552
 553   /* Check for cases where the C++ memory model applies.  */
 554   if (bitregion_end != 0
 555       && (bitnum - bitnum % modesize < bitregion_start
 556           || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
 557     return false;
 558
 559   return true;
 560 }
 561
 562 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 563    bit number BITNUM can be treated as a simple value of mode MODE.  */
 564
 565 static bool
 566 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 567                        unsigned HOST_WIDE_INT bitnum, machine_mode mode)
 568 {
 569   return (MEM_P (op0)
 570           && bitnum % BITS_PER_UNIT == 0
 571           && bitsize == GET_MODE_BITSIZE (mode)
 572           && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
 573               || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
 574                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 575 }
 576 \f
 577 /* Try to use instruction INSV to store VALUE into a field of OP0.
 578    BITSIZE and BITNUM are as for store_bit_field.  */
 579
 580 static bool
 581 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 582                             unsigned HOST_WIDE_INT bitsize,
 583                             unsigned HOST_WIDE_INT bitnum,
 584                             rtx value)
 585 {
 586   struct expand_operand ops[4];
 587   rtx value1;
 588   rtx xop0 = op0;
 589   rtx_insn *last = get_last_insn ();
 590   bool copy_back = false;
 591
 592   machine_mode op_mode = insv->field_mode;
 593   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 594   if (bitsize == 0 || bitsize > unit)
 595     return false;
 596
 597   if (MEM_P (xop0))
 598     /* Get a reference to the first byte of the field.  */
 599     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 600                                  &bitnum);
 601   else
 602     {
 603       /* Convert from counting within OP0 to counting in OP_MODE.  */
 604       if (BYTES_BIG_ENDIAN)
 605         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 606
 607       /* If xop0 is a register, we need it in OP_MODE
 608          to make it acceptable to the format of insv.  */
 609       if (GET_CODE (xop0) == SUBREG)
 610         /* We can't just change the mode, because this might clobber op0,
 611            and we will need the original value of op0 if insv fails.  */
 612         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 613       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 614         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 615     }
 616
 617   /* If the destination is a paradoxical subreg such that we need a
 618      truncate to the inner mode, perform the insertion on a temporary and
 619      truncate the result to the original destination.  Note that we can't
 620      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 621      X) 0)) is (reg:N X).  */
 622   if (GET_CODE (xop0) == SUBREG
 623       && REG_P (SUBREG_REG (xop0))
 624       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 625                                          op_mode))
 626     {
 627       rtx tem = gen_reg_rtx (op_mode);
 628       emit_move_insn (tem, xop0);
 629       xop0 = tem;
 630       copy_back = true;
 631     }
 632
 633   /* There are similar overflow check at the start of store_bit_field_1,
 634      but that only check the situation where the field lies completely
 635      outside the register, while there do have situation where the field
 636      lies partialy in the register, we need to adjust bitsize for this
 637      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
 638      will broken on those arch support bit insert instruction, like arm, aarch64
 639      etc.  */
 640   if (bitsize + bitnum > unit && bitnum < unit)
 641     {
 642       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
 643                "destination object, data truncated into %wu-bit",
 644                bitsize, unit - bitnum);
 645       bitsize = unit - bitnum;
 646     }
 647
 648   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 649      "backwards" from the size of the unit we are inserting into.
 650      Otherwise, we count bits from the most significant on a
 651      BYTES/BITS_BIG_ENDIAN machine.  */
 652
 653   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 654     bitnum = unit - bitsize - bitnum;
 655
 656   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 657   value1 = value;
 658   if (GET_MODE (value) != op_mode)
 659     {
 660       if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 661         {
 662           rtx tmp;
 663           /* Optimization: Don't bother really extending VALUE
 664              if it has all the bits we will actually use.  However,
 665              if we must narrow it, be sure we do it correctly.  */
 666
 667           if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 668             {
 669               tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 670               if (! tmp)
 671                 tmp = simplify_gen_subreg (op_mode,
 672                                            force_reg (GET_MODE (value),
 673                                                       value1),
 674                                            GET_MODE (value), 0);
 675             }
 676           else
 677             {
 678               tmp = gen_lowpart_if_possible (op_mode, value1);
 679               if (! tmp)
 680                 tmp = gen_lowpart (op_mode, force_reg (GET_MODE (value),
 681                                                        value1));
 682             }
 683           value1 = tmp;
 684         }
 685       else if (CONST_INT_P (value))
 686         value1 = gen_int_mode (INTVAL (value), op_mode);
 687       else
 688         /* Parse phase is supposed to make VALUE's data type
 689            match that of the component reference, which is a type
 690            at least as wide as the field; so VALUE should have
 691            a mode that corresponds to that type.  */
 692         gcc_assert (CONSTANT_P (value));
 693     }
 694
 695   create_fixed_operand (&ops[0], xop0);
 696   create_integer_operand (&ops[1], bitsize);
 697   create_integer_operand (&ops[2], bitnum);
 698   create_input_operand (&ops[3], value1, op_mode);
 699   if (maybe_expand_insn (insv->icode, 4, ops))
 700     {
 701       if (copy_back)
 702         convert_move (op0, xop0, true);
 703       return true;
 704     }
 705   delete_insns_since (last);
 706   return false;
 707 }
 708
 709 /* A subroutine of store_bit_field, with the same arguments.  Return true
 710    if the operation could be implemented.
 711
 712    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 713    no other way of implementing the operation.  If FALLBACK_P is false,
 714    return false instead.  */
 715
 716 static bool
 717 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 718                    unsigned HOST_WIDE_INT bitnum,
 719                    unsigned HOST_WIDE_INT bitregion_start,
 720                    unsigned HOST_WIDE_INT bitregion_end,
 721                    machine_mode fieldmode,
 722                    rtx value, bool reverse, bool fallback_p)
 723 {
 724   rtx op0 = str_rtx;
 725   rtx orig_value;
 726
 727   while (GET_CODE (op0) == SUBREG)
 728     {
 729       /* The following line once was done only if WORDS_BIG_ENDIAN,
 730          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 731          meaningful at a much higher level; when structures are copied
 732          between memory and regs, the higher-numbered regs
 733          always get higher addresses.  */
 734       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 735       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 736       int byte_offset = 0;
 737
 738       /* Paradoxical subregs need special handling on big-endian machines.  */
 739       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 740         {
 741           int difference = inner_mode_size - outer_mode_size;
 742
 743           if (WORDS_BIG_ENDIAN)
 744             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 745           if (BYTES_BIG_ENDIAN)
 746             byte_offset += difference % UNITS_PER_WORD;
 747         }
 748       else
 749         byte_offset = SUBREG_BYTE (op0);
 750
 751       bitnum += byte_offset * BITS_PER_UNIT;
 752       op0 = SUBREG_REG (op0);
 753     }
 754
 755   /* No action is needed if the target is a register and if the field
 756      lies completely outside that register.  This can occur if the source
 757      code contains an out-of-bounds access to a small array.  */
 758   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 759     return true;
 760
 761   /* Use vec_set patterns for inserting parts of vectors whenever
 762      available.  */
 763   if (VECTOR_MODE_P (GET_MODE (op0))
 764       && !MEM_P (op0)
 765       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 766       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 767       && bitsize == GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
 768       && !(bitnum % GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
 769     {
 770       struct expand_operand ops[3];
 771       machine_mode outermode = GET_MODE (op0);
 772       machine_mode innermode = GET_MODE_INNER (outermode);
 773       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 774       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 775
 776       create_fixed_operand (&ops[0], op0);
 777       create_input_operand (&ops[1], value, innermode);
 778       create_integer_operand (&ops[2], pos);
 779       if (maybe_expand_insn (icode, 3, ops))
 780         return true;
 781     }
 782
 783   /* If the target is a register, overwriting the entire object, or storing
 784      a full-word or multi-word field can be done with just a SUBREG.  */
 785   if (!MEM_P (op0)
 786       && bitsize == GET_MODE_BITSIZE (fieldmode)
 787       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 788           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 789     {
 790       /* Use the subreg machinery either to narrow OP0 to the required
 791          words or to cope with mode punning between equal-sized modes.
 792          In the latter case, use subreg on the rhs side, not lhs.  */
 793       rtx sub;
 794
 795       if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
 796         {
 797           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 798           if (sub)
 799             {
 800               if (reverse)
 801                 sub = flip_storage_order (GET_MODE (op0), sub);
 802               emit_move_insn (op0, sub);
 803               return true;
 804             }
 805         }
 806       else
 807         {
 808           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 809                                      bitnum / BITS_PER_UNIT);
 810           if (sub)
 811             {
 812               if (reverse)
 813                 value = flip_storage_order (fieldmode, value);
 814               emit_move_insn (sub, value);
 815               return true;
 816             }
 817         }
 818     }
 819
 820   /* If the target is memory, storing any naturally aligned field can be
 821      done with a simple store.  For targets that support fast unaligned
 822      memory, any naturally sized, unit aligned field can be done directly.  */
 823   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
 824     {
 825       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 826       if (reverse)
 827         value = flip_storage_order (fieldmode, value);
 828       emit_move_insn (op0, value);
 829       return true;
 830     }
 831
 832   /* Make sure we are playing with integral modes.  Pun with subregs
 833      if we aren't.  This must come after the entire register case above,
 834      since that case is valid for any mode.  The following cases are only
 835      valid for integral modes.  */
 836   {
 837     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 838     if (imode != GET_MODE (op0))
 839       {
 840         if (MEM_P (op0))
 841           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
 842         else
 843           {
 844             gcc_assert (imode != BLKmode);
 845             op0 = gen_lowpart (imode, op0);
 846           }
 847       }
 848   }
 849
 850   /* Storing an lsb-aligned field in a register
 851      can be done with a movstrict instruction.  */
 852
 853   if (!MEM_P (op0)
 854       && !reverse
 855       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 856       && bitsize == GET_MODE_BITSIZE (fieldmode)
 857       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 858     {
 859       struct expand_operand ops[2];
 860       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 861       rtx arg0 = op0;
 862       unsigned HOST_WIDE_INT subreg_off;
 863
 864       if (GET_CODE (arg0) == SUBREG)
 865         {
 866           /* Else we've got some float mode source being extracted into
 867              a different float mode destination -- this combination of
 868              subregs results in Severe Tire Damage.  */
 869           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 870                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 871                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 872           arg0 = SUBREG_REG (arg0);
 873         }
 874
 875       subreg_off = bitnum / BITS_PER_UNIT;
 876       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 877         {
 878           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 879
 880           create_fixed_operand (&ops[0], arg0);
 881           /* Shrink the source operand to FIELDMODE.  */
 882           create_convert_operand_to (&ops[1], value, fieldmode, false);
 883           if (maybe_expand_insn (icode, 2, ops))
 884             return true;
 885         }
 886     }
 887
 888   /* Handle fields bigger than a word.  */
 889
 890   if (bitsize > BITS_PER_WORD)
 891     {
 892       /* Here we transfer the words of the field
 893          in the order least significant first.
 894          This is because the most significant word is the one which may
 895          be less than full.
 896          However, only do that if the value is not BLKmode.  */
 897
 898       const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 899       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 900       unsigned int i;
 901       rtx_insn *last;
 902
 903       /* This is the mode we must force value to, so that there will be enough
 904          subwords to extract.  Note that fieldmode will often (always?) be
 905          VOIDmode, because that is what store_field uses to indicate that this
 906          is a bit field, but passing VOIDmode to operand_subword_force
 907          is not allowed.  */
 908       fieldmode = GET_MODE (value);
 909       if (fieldmode == VOIDmode)
 910         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 911
 912       last = get_last_insn ();
 913       for (i = 0; i < nwords; i++)
 914         {
 915           /* If I is 0, use the low-order word in both field and target;
 916              if I is 1, use the next to lowest word; and so on.  */
 917           unsigned int wordnum = (backwards
 918                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 919                                   - i - 1
 920                                   : i);
 921           unsigned int bit_offset = (backwards ^ reverse
 922                                      ? MAX ((int) bitsize - ((int) i + 1)
 923                                             * BITS_PER_WORD,
 924                                             0)
 925                                      : (int) i * BITS_PER_WORD);
 926           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 927           unsigned HOST_WIDE_INT new_bitsize =
 928             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 929
 930           /* If the remaining chunk doesn't have full wordsize we have
 931              to make sure that for big-endian machines the higher order
 932              bits are used.  */
 933           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 934             value_word = simplify_expand_binop (word_mode, lshr_optab,
 935                                                 value_word,
 936                                                 GEN_INT (BITS_PER_WORD
 937                                                          - new_bitsize),
 938                                                 NULL_RTX, true,
 939                                                 OPTAB_LIB_WIDEN);
 940
 941           if (!store_bit_field_1 (op0, new_bitsize,
 942                                   bitnum + bit_offset,
 943                                   bitregion_start, bitregion_end,
 944                                   word_mode,
 945                                   value_word, reverse, fallback_p))
 946             {
 947               delete_insns_since (last);
 948               return false;
 949             }
 950         }
 951       return true;
 952     }
 953
 954   /* If VALUE has a floating-point or complex mode, access it as an
 955      integer of the corresponding size.  This can occur on a machine
 956      with 64 bit registers that uses SFmode for float.  It can also
 957      occur for unaligned float or complex fields.  */
 958   orig_value = value;
 959   if (GET_MODE (value) != VOIDmode
 960       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 961       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 962     {
 963       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 964       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 965     }
 966
 967   /* If OP0 is a multi-word register, narrow it to the affected word.
 968      If the region spans two words, defer to store_split_bit_field.  */
 969   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 970     {
 971       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
 972         {
 973           if (!fallback_p)
 974             return false;
 975
 976           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 977                                  bitregion_end, value, reverse);
 978           return true;
 979         }
 980       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
 981                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 982       gcc_assert (op0);
 983       bitnum %= BITS_PER_WORD;
 984     }
 985
 986   /* From here on we can assume that the field to be stored in fits
 987      within a word.  If the destination is a register, it too fits
 988      in a word.  */
 989
 990   extraction_insn insv;
 991   if (!MEM_P (op0)
 992       && !reverse
 993       && get_best_reg_extraction_insn (&insv, EP_insv,
 994                                        GET_MODE_BITSIZE (GET_MODE (op0)),
 995                                        fieldmode)
 996       && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 997     return true;
 998
 999   /* If OP0 is a memory, try copying it to a register and seeing if a
1000      cheap register alternative is available.  */
1001   if (MEM_P (op0) && !reverse)
1002     {
1003       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1004                                         fieldmode)
1005           && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
1006         return true;
1007
1008       rtx_insn *last = get_last_insn ();
1009
1010       /* Try loading part of OP0 into a register, inserting the bitfield
1011          into that, and then copying the result back to OP0.  */
1012       unsigned HOST_WIDE_INT bitpos;
1013       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1014                                                bitregion_start, bitregion_end,
1015                                                fieldmode, &bitpos);
1016       if (xop0)
1017         {
1018           rtx tempreg = copy_to_reg (xop0);
1019           if (store_bit_field_1 (tempreg, bitsize, bitpos,
1020                                  bitregion_start, bitregion_end,
1021                                  fieldmode, orig_value, reverse, false))
1022             {
1023               emit_move_insn (xop0, tempreg);
1024               return true;
1025             }
1026           delete_insns_since (last);
1027         }
1028     }
1029
1030   if (!fallback_p)
1031     return false;
1032
1033   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
1034                          bitregion_end, value, reverse);
1035   return true;
1036 }
1037
1038 /* Generate code to store value from rtx VALUE
1039    into a bit-field within structure STR_RTX
1040    containing BITSIZE bits starting at bit BITNUM.
1041
1042    BITREGION_START is bitpos of the first bitfield in this region.
1043    BITREGION_END is the bitpos of the ending bitfield in this region.
1044    These two fields are 0, if the C++ memory model does not apply,
1045    or we are not interested in keeping track of bitfield regions.
1046
1047    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1048
1049    If REVERSE is true, the store is to be done in reverse order.  */
1050
1051 void
1052 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1053                  unsigned HOST_WIDE_INT bitnum,
1054                  unsigned HOST_WIDE_INT bitregion_start,
1055                  unsigned HOST_WIDE_INT bitregion_end,
1056                  machine_mode fieldmode,
1057                  rtx value, bool reverse)
1058 {
1059   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1060   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
1061                                   bitregion_start, bitregion_end))
1062     {
1063       /* Storing of a full word can be done with a simple store.
1064          We know here that the field can be accessed with one single
1065          instruction.  For targets that support unaligned memory,
1066          an unaligned access may be necessary.  */
1067       if (bitsize == GET_MODE_BITSIZE (fieldmode))
1068         {
1069           str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
1070                                              bitnum / BITS_PER_UNIT);
1071           if (reverse)
1072             value = flip_storage_order (fieldmode, value);
1073           gcc_assert (bitnum % BITS_PER_UNIT == 0);
1074           emit_move_insn (str_rtx, value);
1075         }
1076       else
1077         {
1078           rtx temp;
1079
1080           str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
1081                                           &bitnum);
1082           gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (fieldmode));
1083           temp = copy_to_reg (str_rtx);
1084           if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
1085                                   fieldmode, value, reverse, true))
1086             gcc_unreachable ();
1087
1088           emit_move_insn (str_rtx, temp);
1089         }
1090
1091       return;
1092     }
1093
1094   /* Under the C++0x memory model, we must not touch bits outside the
1095      bit region.  Adjust the address to start at the beginning of the
1096      bit region.  */
1097   if (MEM_P (str_rtx) && bitregion_start > 0)
1098     {
1099       machine_mode bestmode;
1100       HOST_WIDE_INT offset, size;
1101
1102       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1103
1104       offset = bitregion_start / BITS_PER_UNIT;
1105       bitnum -= bitregion_start;
1106       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1107       bitregion_end -= bitregion_start;
1108       bitregion_start = 0;
1109       bestmode = get_best_mode (bitsize, bitnum,
1110                                 bitregion_start, bitregion_end,
1111                                 MEM_ALIGN (str_rtx), VOIDmode,
1112                                 MEM_VOLATILE_P (str_rtx));
1113       str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
1114     }
1115
1116   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1117                           bitregion_start, bitregion_end,
1118                           fieldmode, value, reverse, true))
1119     gcc_unreachable ();
1120 }
1121 \f
1122 /* Use shifts and boolean operations to store VALUE into a bit field of
1123    width BITSIZE in OP0, starting at bit BITNUM.
1124
1125    If REVERSE is true, the store is to be done in reverse order.  */
1126
1127 static void
1128 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1129                        unsigned HOST_WIDE_INT bitnum,
1130                        unsigned HOST_WIDE_INT bitregion_start,
1131                        unsigned HOST_WIDE_INT bitregion_end,
1132                        rtx value, bool reverse)
1133 {
1134   /* There is a case not handled here:
1135      a structure with a known alignment of just a halfword
1136      and a field split across two aligned halfwords within the structure.
1137      Or likewise a structure with a known alignment of just a byte
1138      and a field split across two bytes.
1139      Such cases are not supposed to be able to occur.  */
1140
1141   if (MEM_P (op0))
1142     {
1143       machine_mode mode = GET_MODE (op0);
1144       if (GET_MODE_BITSIZE (mode) == 0
1145           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1146         mode = word_mode;
1147       mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1148                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1149
1150       if (mode == VOIDmode)
1151         {
1152           /* The only way this should occur is if the field spans word
1153              boundaries.  */
1154           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1155                                  bitregion_end, value, reverse);
1156           return;
1157         }
1158
1159       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1160     }
1161
1162   store_fixed_bit_field_1 (op0, bitsize, bitnum, value, reverse);
1163 }
1164
1165 /* Helper function for store_fixed_bit_field, stores
1166    the bit field always using the MODE of OP0.  */
1167
1168 static void
1169 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1170                          unsigned HOST_WIDE_INT bitnum,
1171                          rtx value, bool reverse)
1172 {
1173   machine_mode mode;
1174   rtx temp;
1175   int all_zero = 0;
1176   int all_one = 0;
1177
1178   mode = GET_MODE (op0);
1179   gcc_assert (SCALAR_INT_MODE_P (mode));
1180
1181   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1182      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1183
1184   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1185     /* BITNUM is the distance between our msb
1186        and that of the containing datum.
1187        Convert it to the distance from the lsb.  */
1188     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1189
1190   /* Now BITNUM is always the distance between our lsb
1191      and that of OP0.  */
1192
1193   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1194      we must first convert its mode to MODE.  */
1195
1196   if (CONST_INT_P (value))
1197     {
1198       unsigned HOST_WIDE_INT v = UINTVAL (value);
1199
1200       if (bitsize < HOST_BITS_PER_WIDE_INT)
1201         v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1202
1203       if (v == 0)
1204         all_zero = 1;
1205       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1206                 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1207                || (bitsize == HOST_BITS_PER_WIDE_INT
1208                    && v == HOST_WIDE_INT_M1U))
1209         all_one = 1;
1210
1211       value = lshift_value (mode, v, bitnum);
1212     }
1213   else
1214     {
1215       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1216                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1217
1218       if (GET_MODE (value) != mode)
1219         value = convert_to_mode (mode, value, 1);
1220
1221       if (must_and)
1222         value = expand_binop (mode, and_optab, value,
1223                               mask_rtx (mode, 0, bitsize, 0),
1224                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1225       if (bitnum > 0)
1226         value = expand_shift (LSHIFT_EXPR, mode, value,
1227                               bitnum, NULL_RTX, 1);
1228     }
1229
1230   if (reverse)
1231     value = flip_storage_order (mode, value);
1232
1233   /* Now clear the chosen bits in OP0,
1234      except that if VALUE is -1 we need not bother.  */
1235   /* We keep the intermediates in registers to allow CSE to combine
1236      consecutive bitfield assignments.  */
1237
1238   temp = force_reg (mode, op0);
1239
1240   if (! all_one)
1241     {
1242       rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1243       if (reverse)
1244         mask = flip_storage_order (mode, mask);
1245       temp = expand_binop (mode, and_optab, temp, mask,
1246                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1247       temp = force_reg (mode, temp);
1248     }
1249
1250   /* Now logical-or VALUE into OP0, unless it is zero.  */
1251
1252   if (! all_zero)
1253     {
1254       temp = expand_binop (mode, ior_optab, temp, value,
1255                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1256       temp = force_reg (mode, temp);
1257     }
1258
1259   if (op0 != temp)
1260     {
1261       op0 = copy_rtx (op0);
1262       emit_move_insn (op0, temp);
1263     }
1264 }
1265 \f
1266 /* Store a bit field that is split across multiple accessible memory objects.
1267
1268    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1269    BITSIZE is the field width; BITPOS the position of its first bit
1270    (within the word).
1271    VALUE is the value to store.
1272
1273    If REVERSE is true, the store is to be done in reverse order.
1274
1275    This does not yet handle fields wider than BITS_PER_WORD.  */
1276
1277 static void
1278 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1279                        unsigned HOST_WIDE_INT bitpos,
1280                        unsigned HOST_WIDE_INT bitregion_start,
1281                        unsigned HOST_WIDE_INT bitregion_end,
1282                        rtx value, bool reverse)
1283 {
1284   unsigned int unit, total_bits, bitsdone = 0;
1285
1286   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1287      much at a time.  */
1288   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1289     unit = BITS_PER_WORD;
1290   else
1291     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1292
1293   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1294      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1295      again, and we will mutually recurse forever.  */
1296   if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1297     unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1298
1299   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1300      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1301      that VALUE might be a floating-point constant.  */
1302   if (CONSTANT_P (value) && !CONST_INT_P (value))
1303     {
1304       rtx word = gen_lowpart_common (word_mode, value);
1305
1306       if (word && (value != word))
1307         value = word;
1308       else
1309         value = gen_lowpart_common (word_mode,
1310                                     force_reg (GET_MODE (value) != VOIDmode
1311                                                ? GET_MODE (value)
1312                                                : word_mode, value));
1313     }
1314
1315   total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1316
1317   while (bitsdone < bitsize)
1318     {
1319       unsigned HOST_WIDE_INT thissize;
1320       unsigned HOST_WIDE_INT thispos;
1321       unsigned HOST_WIDE_INT offset;
1322       rtx part, word;
1323
1324       offset = (bitpos + bitsdone) / unit;
1325       thispos = (bitpos + bitsdone) % unit;
1326
1327       /* When region of bytes we can touch is restricted, decrease
1328          UNIT close to the end of the region as needed.  If op0 is a REG
1329          or SUBREG of REG, don't do this, as there can't be data races
1330          on a register and we can expand shorter code in some cases.  */
1331       if (bitregion_end
1332           && unit > BITS_PER_UNIT
1333           && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1334           && !REG_P (op0)
1335           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1336         {
1337           unit = unit / 2;
1338           continue;
1339         }
1340
1341       /* THISSIZE must not overrun a word boundary.  Otherwise,
1342          store_fixed_bit_field will call us again, and we will mutually
1343          recurse forever.  */
1344       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1345       thissize = MIN (thissize, unit - thispos);
1346
1347       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1348         {
1349           /* Fetch successively less significant portions.  */
1350           if (CONST_INT_P (value))
1351             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1352                              >> (bitsize - bitsdone - thissize))
1353                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1354           /* Likewise, but the source is little-endian.  */
1355           else if (reverse)
1356             part = extract_fixed_bit_field (word_mode, value, thissize,
1357                                             bitsize - bitsdone - thissize,
1358                                             NULL_RTX, 1, false);
1359           else
1360             {
1361               int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1362               /* The args are chosen so that the last part includes the
1363                  lsb.  Give extract_bit_field the value it needs (with
1364                  endianness compensation) to fetch the piece we want.  */
1365               part = extract_fixed_bit_field (word_mode, value, thissize,
1366                                               total_bits - bitsize + bitsdone,
1367                                               NULL_RTX, 1, false);
1368             }
1369         }
1370       else
1371         {
1372           /* Fetch successively more significant portions.  */
1373           if (CONST_INT_P (value))
1374             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1375                              >> bitsdone)
1376                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1377           /* Likewise, but the source is big-endian.  */
1378           else if (reverse)
1379             part = extract_fixed_bit_field (word_mode, value, thissize,
1380                                             total_bits - bitsdone - thissize,
1381                                             NULL_RTX, 1, false);
1382           else
1383             part = extract_fixed_bit_field (word_mode, value, thissize,
1384                                             bitsdone, NULL_RTX, 1, false);
1385         }
1386
1387       /* If OP0 is a register, then handle OFFSET here.  */
1388       if (SUBREG_P (op0) || REG_P (op0))
1389         {
1390           machine_mode op0_mode = GET_MODE (op0);
1391           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1392             word = offset ? const0_rtx : op0;
1393           else
1394             word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1395                                           GET_MODE (op0));
1396           offset &= BITS_PER_WORD / unit - 1;
1397         }
1398       else
1399         word = op0;
1400
1401       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1402          it is just an out-of-bounds access.  Ignore it.  */
1403       if (word != const0_rtx)
1404         store_fixed_bit_field (word, thissize, offset * unit + thispos,
1405                                bitregion_start, bitregion_end, part,
1406                                reverse);
1407       bitsdone += thissize;
1408     }
1409 }
1410 \f
1411 /* A subroutine of extract_bit_field_1 that converts return value X
1412    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1413    to extract_bit_field.  */
1414
1415 static rtx
1416 convert_extracted_bit_field (rtx x, machine_mode mode,
1417                              machine_mode tmode, bool unsignedp)
1418 {
1419   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1420     return x;
1421
1422   /* If the x mode is not a scalar integral, first convert to the
1423      integer mode of that size and then access it as a floating-point
1424      value via a SUBREG.  */
1425   if (!SCALAR_INT_MODE_P (tmode))
1426     {
1427       machine_mode smode;
1428
1429       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1430       x = convert_to_mode (smode, x, unsignedp);
1431       x = force_reg (smode, x);
1432       return gen_lowpart (tmode, x);
1433     }
1434
1435   return convert_to_mode (tmode, x, unsignedp);
1436 }
1437
1438 /* Try to use an ext(z)v pattern to extract a field from OP0.
1439    Return the extracted value on success, otherwise return null.
1440    EXT_MODE is the mode of the extraction and the other arguments
1441    are as for extract_bit_field.  */
1442
1443 static rtx
1444 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1445                               unsigned HOST_WIDE_INT bitsize,
1446                               unsigned HOST_WIDE_INT bitnum,
1447                               int unsignedp, rtx target,
1448                               machine_mode mode, machine_mode tmode)
1449 {
1450   struct expand_operand ops[4];
1451   rtx spec_target = target;
1452   rtx spec_target_subreg = 0;
1453   machine_mode ext_mode = extv->field_mode;
1454   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1455
1456   if (bitsize == 0 || unit < bitsize)
1457     return NULL_RTX;
1458
1459   if (MEM_P (op0))
1460     /* Get a reference to the first byte of the field.  */
1461     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1462                                 &bitnum);
1463   else
1464     {
1465       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1466       if (BYTES_BIG_ENDIAN)
1467         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1468
1469       /* If op0 is a register, we need it in EXT_MODE to make it
1470          acceptable to the format of ext(z)v.  */
1471       if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1472         return NULL_RTX;
1473       if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1474         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1475     }
1476
1477   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1478      "backwards" from the size of the unit we are extracting from.
1479      Otherwise, we count bits from the most significant on a
1480      BYTES/BITS_BIG_ENDIAN machine.  */
1481
1482   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1483     bitnum = unit - bitsize - bitnum;
1484
1485   if (target == 0)
1486     target = spec_target = gen_reg_rtx (tmode);
1487
1488   if (GET_MODE (target) != ext_mode)
1489     {
1490       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1491          between the mode of the extraction (word_mode) and the target
1492          mode.  Instead, create a temporary and use convert_move to set
1493          the target.  */
1494       if (REG_P (target)
1495           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1496         {
1497           target = gen_lowpart (ext_mode, target);
1498           if (GET_MODE_PRECISION (ext_mode)
1499               > GET_MODE_PRECISION (GET_MODE (spec_target)))
1500             spec_target_subreg = target;
1501         }
1502       else
1503         target = gen_reg_rtx (ext_mode);
1504     }
1505
1506   create_output_operand (&ops[0], target, ext_mode);
1507   create_fixed_operand (&ops[1], op0);
1508   create_integer_operand (&ops[2], bitsize);
1509   create_integer_operand (&ops[3], bitnum);
1510   if (maybe_expand_insn (extv->icode, 4, ops))
1511     {
1512       target = ops[0].value;
1513       if (target == spec_target)
1514         return target;
1515       if (target == spec_target_subreg)
1516         return spec_target;
1517       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1518     }
1519   return NULL_RTX;
1520 }
1521
1522 /* A subroutine of extract_bit_field, with the same arguments.
1523    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1524    if we can find no other means of implementing the operation.
1525    if FALLBACK_P is false, return NULL instead.  */
1526
1527 static rtx
1528 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1529                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1530                      machine_mode mode, machine_mode tmode,
1531                      bool reverse, bool fallback_p)
1532 {
1533   rtx op0 = str_rtx;
1534   machine_mode int_mode;
1535   machine_mode mode1;
1536
1537   if (tmode == VOIDmode)
1538     tmode = mode;
1539
1540   while (GET_CODE (op0) == SUBREG)
1541     {
1542       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1543       op0 = SUBREG_REG (op0);
1544     }
1545
1546   /* If we have an out-of-bounds access to a register, just return an
1547      uninitialized register of the required mode.  This can occur if the
1548      source code contains an out-of-bounds access to a small array.  */
1549   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1550     return gen_reg_rtx (tmode);
1551
1552   if (REG_P (op0)
1553       && mode == GET_MODE (op0)
1554       && bitnum == 0
1555       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1556     {
1557       if (reverse)
1558         op0 = flip_storage_order (mode, op0);
1559       /* We're trying to extract a full register from itself.  */
1560       return op0;
1561     }
1562
1563   /* See if we can get a better vector mode before extracting.  */
1564   if (VECTOR_MODE_P (GET_MODE (op0))
1565       && !MEM_P (op0)
1566       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1567     {
1568       machine_mode new_mode;
1569
1570       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1571         new_mode = MIN_MODE_VECTOR_FLOAT;
1572       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1573         new_mode = MIN_MODE_VECTOR_FRACT;
1574       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1575         new_mode = MIN_MODE_VECTOR_UFRACT;
1576       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1577         new_mode = MIN_MODE_VECTOR_ACCUM;
1578       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1579         new_mode = MIN_MODE_VECTOR_UACCUM;
1580       else
1581         new_mode = MIN_MODE_VECTOR_INT;
1582
1583       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1584         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1585             && GET_MODE_UNIT_SIZE (new_mode) == GET_MODE_SIZE (tmode)
1586             && targetm.vector_mode_supported_p (new_mode))
1587           break;
1588       if (new_mode != VOIDmode)
1589         op0 = gen_lowpart (new_mode, op0);
1590     }
1591
1592   /* Use vec_extract patterns for extracting parts of vectors whenever
1593      available.  */
1594   if (VECTOR_MODE_P (GET_MODE (op0))
1595       && !MEM_P (op0)
1596       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1597       && ((bitnum + bitsize - 1) / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
1598           == bitnum / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
1599     {
1600       struct expand_operand ops[3];
1601       machine_mode outermode = GET_MODE (op0);
1602       machine_mode innermode = GET_MODE_INNER (outermode);
1603       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1604       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1605
1606       create_output_operand (&ops[0], target, innermode);
1607       create_input_operand (&ops[1], op0, outermode);
1608       create_integer_operand (&ops[2], pos);
1609       if (maybe_expand_insn (icode, 3, ops))
1610         {
1611           target = ops[0].value;
1612           if (GET_MODE (target) != mode)
1613             return gen_lowpart (tmode, target);
1614           return target;
1615         }
1616     }
1617
1618   /* Make sure we are playing with integral modes.  Pun with subregs
1619      if we aren't.  */
1620   {
1621     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1622     if (imode != GET_MODE (op0))
1623       {
1624         if (MEM_P (op0))
1625           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1626         else if (imode != BLKmode)
1627           {
1628             op0 = gen_lowpart (imode, op0);
1629
1630             /* If we got a SUBREG, force it into a register since we
1631                aren't going to be able to do another SUBREG on it.  */
1632             if (GET_CODE (op0) == SUBREG)
1633               op0 = force_reg (imode, op0);
1634           }
1635         else
1636           {
1637             HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1638             rtx mem = assign_stack_temp (GET_MODE (op0), size);
1639             emit_move_insn (mem, op0);
1640             op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1641           }
1642       }
1643   }
1644
1645   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1646      If that's wrong, the solution is to test for it and set TARGET to 0
1647      if needed.  */
1648
1649   /* Get the mode of the field to use for atomic access or subreg
1650      conversion.  */
1651   mode1 = mode;
1652   if (SCALAR_INT_MODE_P (tmode))
1653     {
1654       machine_mode try_mode = mode_for_size (bitsize,
1655                                                   GET_MODE_CLASS (tmode), 0);
1656       if (try_mode != BLKmode)
1657         mode1 = try_mode;
1658     }
1659   gcc_assert (mode1 != BLKmode);
1660
1661   /* Extraction of a full MODE1 value can be done with a subreg as long
1662      as the least significant bit of the value is the least significant
1663      bit of either OP0 or a word of OP0.  */
1664   if (!MEM_P (op0)
1665       && !reverse
1666       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1667       && bitsize == GET_MODE_BITSIZE (mode1)
1668       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1669     {
1670       rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1671                                      bitnum / BITS_PER_UNIT);
1672       if (sub)
1673         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1674     }
1675
1676   /* Extraction of a full MODE1 value can be done with a load as long as
1677      the field is on a byte boundary and is sufficiently aligned.  */
1678   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1679     {
1680       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1681       if (reverse)
1682         op0 = flip_storage_order (mode1, op0);
1683       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1684     }
1685
1686   /* Handle fields bigger than a word.  */
1687
1688   if (bitsize > BITS_PER_WORD)
1689     {
1690       /* Here we transfer the words of the field
1691          in the order least significant first.
1692          This is because the most significant word is the one which may
1693          be less than full.  */
1694
1695       const bool backwards = WORDS_BIG_ENDIAN;
1696       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1697       unsigned int i;
1698       rtx_insn *last;
1699
1700       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1701         target = gen_reg_rtx (mode);
1702
1703       /* In case we're about to clobber a base register or something
1704          (see gcc.c-torture/execute/20040625-1.c).   */
1705       if (reg_mentioned_p (target, str_rtx))
1706         target = gen_reg_rtx (mode);
1707
1708       /* Indicate for flow that the entire target reg is being set.  */
1709       emit_clobber (target);
1710
1711       last = get_last_insn ();
1712       for (i = 0; i < nwords; i++)
1713         {
1714           /* If I is 0, use the low-order word in both field and target;
1715              if I is 1, use the next to lowest word; and so on.  */
1716           /* Word number in TARGET to use.  */
1717           unsigned int wordnum
1718             = (backwards
1719                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1720                : i);
1721           /* Offset from start of field in OP0.  */
1722           unsigned int bit_offset = (backwards ^ reverse
1723                                      ? MAX ((int) bitsize - ((int) i + 1)
1724                                             * BITS_PER_WORD,
1725                                             0)
1726                                      : (int) i * BITS_PER_WORD);
1727           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1728           rtx result_part
1729             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1730                                              bitsize - i * BITS_PER_WORD),
1731                                    bitnum + bit_offset, 1, target_part,
1732                                    mode, word_mode, reverse, fallback_p);
1733
1734           gcc_assert (target_part);
1735           if (!result_part)
1736             {
1737               delete_insns_since (last);
1738               return NULL;
1739             }
1740
1741           if (result_part != target_part)
1742             emit_move_insn (target_part, result_part);
1743         }
1744
1745       if (unsignedp)
1746         {
1747           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1748              need to be zero'd out.  */
1749           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1750             {
1751               unsigned int i, total_words;
1752
1753               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1754               for (i = nwords; i < total_words; i++)
1755                 emit_move_insn
1756                   (operand_subword (target,
1757                                     backwards ? total_words - i - 1 : i,
1758                                     1, VOIDmode),
1759                    const0_rtx);
1760             }
1761           return target;
1762         }
1763
1764       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1765       target = expand_shift (LSHIFT_EXPR, mode, target,
1766                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1767       return expand_shift (RSHIFT_EXPR, mode, target,
1768                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1769     }
1770
1771   /* If OP0 is a multi-word register, narrow it to the affected word.
1772      If the region spans two words, defer to extract_split_bit_field.  */
1773   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1774     {
1775       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1776         {
1777           if (!fallback_p)
1778             return NULL_RTX;
1779           target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1780                                             reverse);
1781           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1782         }
1783       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1784                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1785       bitnum %= BITS_PER_WORD;
1786     }
1787
1788   /* From here on we know the desired field is smaller than a word.
1789      If OP0 is a register, it too fits within a word.  */
1790   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1791   extraction_insn extv;
1792   if (!MEM_P (op0)
1793       && !reverse
1794       /* ??? We could limit the structure size to the part of OP0 that
1795          contains the field, with appropriate checks for endianness
1796          and TRULY_NOOP_TRUNCATION.  */
1797       && get_best_reg_extraction_insn (&extv, pattern,
1798                                        GET_MODE_BITSIZE (GET_MODE (op0)),
1799                                        tmode))
1800     {
1801       rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1802                                                  unsignedp, target, mode,
1803                                                  tmode);
1804       if (result)
1805         return result;
1806     }
1807
1808   /* If OP0 is a memory, try copying it to a register and seeing if a
1809      cheap register alternative is available.  */
1810   if (MEM_P (op0) & !reverse)
1811     {
1812       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1813                                         tmode))
1814         {
1815           rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1816                                                      bitnum, unsignedp,
1817                                                      target, mode,
1818                                                      tmode);
1819           if (result)
1820             return result;
1821         }
1822
1823       rtx_insn *last = get_last_insn ();
1824
1825       /* Try loading part of OP0 into a register and extracting the
1826          bitfield from that.  */
1827       unsigned HOST_WIDE_INT bitpos;
1828       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1829                                                0, 0, tmode, &bitpos);
1830       if (xop0)
1831         {
1832           xop0 = copy_to_reg (xop0);
1833           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1834                                             unsignedp, target,
1835                                             mode, tmode, reverse, false);
1836           if (result)
1837             return result;
1838           delete_insns_since (last);
1839         }
1840     }
1841
1842   if (!fallback_p)
1843     return NULL;
1844
1845   /* Find a correspondingly-sized integer field, so we can apply
1846      shifts and masks to it.  */
1847   int_mode = int_mode_for_mode (tmode);
1848   if (int_mode == BLKmode)
1849     int_mode = int_mode_for_mode (mode);
1850   /* Should probably push op0 out to memory and then do a load.  */
1851   gcc_assert (int_mode != BLKmode);
1852
1853   target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum, target,
1854                                     unsignedp, reverse);
1855
1856   /* Complex values must be reversed piecewise, so we need to undo the global
1857      reversal, convert to the complex mode and reverse again.  */
1858   if (reverse && COMPLEX_MODE_P (tmode))
1859     {
1860       target = flip_storage_order (int_mode, target);
1861       target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1862       target = flip_storage_order (tmode, target);
1863     }
1864   else
1865     target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1866
1867   return target;
1868 }
1869
1870 /* Generate code to extract a byte-field from STR_RTX
1871    containing BITSIZE bits, starting at BITNUM,
1872    and put it in TARGET if possible (if TARGET is nonzero).
1873    Regardless of TARGET, we return the rtx for where the value is placed.
1874
1875    STR_RTX is the structure containing the byte (a REG or MEM).
1876    UNSIGNEDP is nonzero if this is an unsigned bit field.
1877    MODE is the natural mode of the field value once extracted.
1878    TMODE is the mode the caller would like the value to have;
1879    but the value may be returned with type MODE instead.
1880
1881    If REVERSE is true, the extraction is to be done in reverse order.
1882
1883    If a TARGET is specified and we can store in it at no extra cost,
1884    we do so, and return TARGET.
1885    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1886    if they are equally easy.  */
1887
1888 rtx
1889 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1890                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1891                    machine_mode mode, machine_mode tmode, bool reverse)
1892 {
1893   machine_mode mode1;
1894
1895   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1896   if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1897     mode1 = GET_MODE (str_rtx);
1898   else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1899     mode1 = GET_MODE (target);
1900   else
1901     mode1 = tmode;
1902
1903   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1904     {
1905       /* Extraction of a full MODE1 value can be done with a simple load.
1906          We know here that the field can be accessed with one single
1907          instruction.  For targets that support unaligned memory,
1908          an unaligned access may be necessary.  */
1909       if (bitsize == GET_MODE_BITSIZE (mode1))
1910         {
1911           rtx result = adjust_bitfield_address (str_rtx, mode1,
1912                                                 bitnum / BITS_PER_UNIT);
1913           if (reverse)
1914             result = flip_storage_order (mode1, result);
1915           gcc_assert (bitnum % BITS_PER_UNIT == 0);
1916           return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1917         }
1918
1919       str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1920                                       &bitnum);
1921       gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (mode1));
1922       str_rtx = copy_to_reg (str_rtx);
1923     }
1924
1925   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1926                               target, mode, tmode, reverse, true);
1927 }
1928 \f
1929 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1930    from bit BITNUM of OP0.
1931
1932    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1933    If REVERSE is true, the extraction is to be done in reverse order.
1934
1935    If TARGET is nonzero, attempts to store the value there
1936    and return TARGET, but this is not guaranteed.
1937    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1938
1939 static rtx
1940 extract_fixed_bit_field (machine_mode tmode, rtx op0,
1941                          unsigned HOST_WIDE_INT bitsize,
1942                          unsigned HOST_WIDE_INT bitnum, rtx target,
1943                          int unsignedp, bool reverse)
1944 {
1945   if (MEM_P (op0))
1946     {
1947       machine_mode mode
1948         = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1949                          MEM_VOLATILE_P (op0));
1950
1951       if (mode == VOIDmode)
1952         /* The only way this should occur is if the field spans word
1953            boundaries.  */
1954         return extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1955                                         reverse);
1956
1957       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1958     }
1959
1960   return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1961                                     target, unsignedp, reverse);
1962 }
1963
1964 /* Helper function for extract_fixed_bit_field, extracts
1965    the bit field always using the MODE of OP0.  */
1966
1967 static rtx
1968 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
1969                            unsigned HOST_WIDE_INT bitsize,
1970                            unsigned HOST_WIDE_INT bitnum, rtx target,
1971                            int unsignedp, bool reverse)
1972 {
1973   machine_mode mode = GET_MODE (op0);
1974   gcc_assert (SCALAR_INT_MODE_P (mode));
1975
1976   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1977      for invalid input, such as extract equivalent of f5 from
1978      gcc.dg/pr48335-2.c.  */
1979
1980   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1981     /* BITNUM is the distance between our msb and that of OP0.
1982        Convert it to the distance from the lsb.  */
1983     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1984
1985   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1986      We have reduced the big-endian case to the little-endian case.  */
1987   if (reverse)
1988     op0 = flip_storage_order (mode, op0);
1989
1990   if (unsignedp)
1991     {
1992       if (bitnum)
1993         {
1994           /* If the field does not already start at the lsb,
1995              shift it so it does.  */
1996           /* Maybe propagate the target for the shift.  */
1997           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1998           if (tmode != mode)
1999             subtarget = 0;
2000           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2001         }
2002       /* Convert the value to the desired mode.  */
2003       if (mode != tmode)
2004         op0 = convert_to_mode (tmode, op0, 1);
2005
2006       /* Unless the msb of the field used to be the msb when we shifted,
2007          mask out the upper bits.  */
2008
2009       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2010         return expand_binop (GET_MODE (op0), and_optab, op0,
2011                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
2012                              target, 1, OPTAB_LIB_WIDEN);
2013       return op0;
2014     }
2015
2016   /* To extract a signed bit-field, first shift its msb to the msb of the word,
2017      then arithmetic-shift its lsb to the lsb of the word.  */
2018   op0 = force_reg (mode, op0);
2019
2020   /* Find the narrowest integer mode that contains the field.  */
2021
2022   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
2023        mode = GET_MODE_WIDER_MODE (mode))
2024     if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
2025       {
2026         op0 = convert_to_mode (mode, op0, 0);
2027         break;
2028       }
2029
2030   if (mode != tmode)
2031     target = 0;
2032
2033   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2034     {
2035       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2036       /* Maybe propagate the target for the shift.  */
2037       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2038       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2039     }
2040
2041   return expand_shift (RSHIFT_EXPR, mode, op0,
2042                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2043 }
2044
2045 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2046    VALUE << BITPOS.  */
2047
2048 static rtx
2049 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2050               int bitpos)
2051 {
2052   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2053 }
2054 \f
2055 /* Extract a bit field that is split across two words
2056    and return an RTX for the result.
2057
2058    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2059    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2060    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2061
2062    If REVERSE is true, the extraction is to be done in reverse order.  */
2063
2064 static rtx
2065 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2066                          unsigned HOST_WIDE_INT bitpos, int unsignedp,
2067                          bool reverse)
2068 {
2069   unsigned int unit;
2070   unsigned int bitsdone = 0;
2071   rtx result = NULL_RTX;
2072   int first = 1;
2073
2074   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2075      much at a time.  */
2076   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2077     unit = BITS_PER_WORD;
2078   else
2079     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2080
2081   while (bitsdone < bitsize)
2082     {
2083       unsigned HOST_WIDE_INT thissize;
2084       rtx part, word;
2085       unsigned HOST_WIDE_INT thispos;
2086       unsigned HOST_WIDE_INT offset;
2087
2088       offset = (bitpos + bitsdone) / unit;
2089       thispos = (bitpos + bitsdone) % unit;
2090
2091       /* THISSIZE must not overrun a word boundary.  Otherwise,
2092          extract_fixed_bit_field will call us again, and we will mutually
2093          recurse forever.  */
2094       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2095       thissize = MIN (thissize, unit - thispos);
2096
2097       /* If OP0 is a register, then handle OFFSET here.  */
2098       if (SUBREG_P (op0) || REG_P (op0))
2099         {
2100           word = operand_subword_force (op0, offset, GET_MODE (op0));
2101           offset = 0;
2102         }
2103       else
2104         word = op0;
2105
2106       /* Extract the parts in bit-counting order,
2107          whose meaning is determined by BYTES_PER_UNIT.
2108          OFFSET is in UNITs, and UNIT is in bits.  */
2109       part = extract_fixed_bit_field (word_mode, word, thissize,
2110                                       offset * unit + thispos, 0, 1, reverse);
2111       bitsdone += thissize;
2112
2113       /* Shift this part into place for the result.  */
2114       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2115         {
2116           if (bitsize != bitsdone)
2117             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2118                                  bitsize - bitsdone, 0, 1);
2119         }
2120       else
2121         {
2122           if (bitsdone != thissize)
2123             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2124                                  bitsdone - thissize, 0, 1);
2125         }
2126
2127       if (first)
2128         result = part;
2129       else
2130         /* Combine the parts with bitwise or.  This works
2131            because we extracted each part as an unsigned bit field.  */
2132         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2133                                OPTAB_LIB_WIDEN);
2134
2135       first = 0;
2136     }
2137
2138   /* Unsigned bit field: we are done.  */
2139   if (unsignedp)
2140     return result;
2141   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2142   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2143                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2144   return expand_shift (RSHIFT_EXPR, word_mode, result,
2145                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2146 }
2147 \f
2148 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2149    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2150    MODE, fill the upper bits with zeros.  Fail if the layout of either
2151    mode is unknown (as for CC modes) or if the extraction would involve
2152    unprofitable mode punning.  Return the value on success, otherwise
2153    return null.
2154
2155    This is different from gen_lowpart* in these respects:
2156
2157      - the returned value must always be considered an rvalue
2158
2159      - when MODE is wider than SRC_MODE, the extraction involves
2160        a zero extension
2161
2162      - when MODE is smaller than SRC_MODE, the extraction involves
2163        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2164
2165    In other words, this routine performs a computation, whereas the
2166    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2167    operations.  */
2168
2169 rtx
2170 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2171 {
2172   machine_mode int_mode, src_int_mode;
2173
2174   if (mode == src_mode)
2175     return src;
2176
2177   if (CONSTANT_P (src))
2178     {
2179       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2180          fails, it will happily create (subreg (symbol_ref)) or similar
2181          invalid SUBREGs.  */
2182       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2183       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2184       if (ret)
2185         return ret;
2186
2187       if (GET_MODE (src) == VOIDmode
2188           || !validate_subreg (mode, src_mode, src, byte))
2189         return NULL_RTX;
2190
2191       src = force_reg (GET_MODE (src), src);
2192       return gen_rtx_SUBREG (mode, src, byte);
2193     }
2194
2195   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2196     return NULL_RTX;
2197
2198   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2199       && MODES_TIEABLE_P (mode, src_mode))
2200     {
2201       rtx x = gen_lowpart_common (mode, src);
2202       if (x)
2203         return x;
2204     }
2205
2206   src_int_mode = int_mode_for_mode (src_mode);
2207   int_mode = int_mode_for_mode (mode);
2208   if (src_int_mode == BLKmode || int_mode == BLKmode)
2209     return NULL_RTX;
2210
2211   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2212     return NULL_RTX;
2213   if (!MODES_TIEABLE_P (int_mode, mode))
2214     return NULL_RTX;
2215
2216   src = gen_lowpart (src_int_mode, src);
2217   src = convert_modes (int_mode, src_int_mode, src, true);
2218   src = gen_lowpart (mode, src);
2219   return src;
2220 }
2221 \f
2222 /* Add INC into TARGET.  */
2223
2224 void
2225 expand_inc (rtx target, rtx inc)
2226 {
2227   rtx value = expand_binop (GET_MODE (target), add_optab,
2228                             target, inc,
2229                             target, 0, OPTAB_LIB_WIDEN);
2230   if (value != target)
2231     emit_move_insn (target, value);
2232 }
2233
2234 /* Subtract DEC from TARGET.  */
2235
2236 void
2237 expand_dec (rtx target, rtx dec)
2238 {
2239   rtx value = expand_binop (GET_MODE (target), sub_optab,
2240                             target, dec,
2241                             target, 0, OPTAB_LIB_WIDEN);
2242   if (value != target)
2243     emit_move_insn (target, value);
2244 }
2245 \f
2246 /* Output a shift instruction for expression code CODE,
2247    with SHIFTED being the rtx for the value to shift,
2248    and AMOUNT the rtx for the amount to shift by.
2249    Store the result in the rtx TARGET, if that is convenient.
2250    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2251    Return the rtx for where the value is.
2252    If that cannot be done, abort the compilation unless MAY_FAIL is true,
2253    in which case 0 is returned.  */
2254
2255 static rtx
2256 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2257                 rtx amount, rtx target, int unsignedp, bool may_fail = false)
2258 {
2259   rtx op1, temp = 0;
2260   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2261   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2262   optab lshift_optab = ashl_optab;
2263   optab rshift_arith_optab = ashr_optab;
2264   optab rshift_uns_optab = lshr_optab;
2265   optab lrotate_optab = rotl_optab;
2266   optab rrotate_optab = rotr_optab;
2267   machine_mode op1_mode;
2268   machine_mode scalar_mode = mode;
2269   int attempt;
2270   bool speed = optimize_insn_for_speed_p ();
2271
2272   if (VECTOR_MODE_P (mode))
2273     scalar_mode = GET_MODE_INNER (mode);
2274   op1 = amount;
2275   op1_mode = GET_MODE (op1);
2276
2277   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2278      shift amount is a vector, use the vector/vector shift patterns.  */
2279   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2280     {
2281       lshift_optab = vashl_optab;
2282       rshift_arith_optab = vashr_optab;
2283       rshift_uns_optab = vlshr_optab;
2284       lrotate_optab = vrotl_optab;
2285       rrotate_optab = vrotr_optab;
2286     }
2287
2288   /* Previously detected shift-counts computed by NEGATE_EXPR
2289      and shifted in the other direction; but that does not work
2290      on all machines.  */
2291
2292   if (SHIFT_COUNT_TRUNCATED)
2293     {
2294       if (CONST_INT_P (op1)
2295           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2296               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2297         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2298                        % GET_MODE_BITSIZE (scalar_mode));
2299       else if (GET_CODE (op1) == SUBREG
2300                && subreg_lowpart_p (op1)
2301                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2302                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2303         op1 = SUBREG_REG (op1);
2304     }
2305
2306   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2307      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2308      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2309      amount instead.  */
2310   if (rotate
2311       && CONST_INT_P (op1)
2312       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2313                    GET_MODE_BITSIZE (scalar_mode) - 1))
2314     {
2315       op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2316       left = !left;
2317       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2318     }
2319
2320   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2321      Note that this is not the case for bigger values.  For instance a rotation
2322      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2323      0x04030201 (bswapsi).  */
2324   if (rotate
2325       && CONST_INT_P (op1)
2326       && INTVAL (op1) == BITS_PER_UNIT
2327       && GET_MODE_SIZE (scalar_mode) == 2
2328       && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2329     return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2330                                   unsignedp);
2331
2332   if (op1 == const0_rtx)
2333     return shifted;
2334
2335   /* Check whether its cheaper to implement a left shift by a constant
2336      bit count by a sequence of additions.  */
2337   if (code == LSHIFT_EXPR
2338       && CONST_INT_P (op1)
2339       && INTVAL (op1) > 0
2340       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2341       && INTVAL (op1) < MAX_BITS_PER_WORD
2342       && (shift_cost (speed, mode, INTVAL (op1))
2343           > INTVAL (op1) * add_cost (speed, mode))
2344       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2345     {
2346       int i;
2347       for (i = 0; i < INTVAL (op1); i++)
2348         {
2349           temp = force_reg (mode, shifted);
2350           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2351                                   unsignedp, OPTAB_LIB_WIDEN);
2352         }
2353       return shifted;
2354     }
2355
2356   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2357     {
2358       enum optab_methods methods;
2359
2360       if (attempt == 0)
2361         methods = OPTAB_DIRECT;
2362       else if (attempt == 1)
2363         methods = OPTAB_WIDEN;
2364       else
2365         methods = OPTAB_LIB_WIDEN;
2366
2367       if (rotate)
2368         {
2369           /* Widening does not work for rotation.  */
2370           if (methods == OPTAB_WIDEN)
2371             continue;
2372           else if (methods == OPTAB_LIB_WIDEN)
2373             {
2374               /* If we have been unable to open-code this by a rotation,
2375                  do it as the IOR of two shifts.  I.e., to rotate A
2376                  by N bits, compute
2377                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2378                  where C is the bitsize of A.
2379
2380                  It is theoretically possible that the target machine might
2381                  not be able to perform either shift and hence we would
2382                  be making two libcalls rather than just the one for the
2383                  shift (similarly if IOR could not be done).  We will allow
2384                  this extremely unlikely lossage to avoid complicating the
2385                  code below.  */
2386
2387               rtx subtarget = target == shifted ? 0 : target;
2388               rtx new_amount, other_amount;
2389               rtx temp1;
2390
2391               new_amount = op1;
2392               if (op1 == const0_rtx)
2393                 return shifted;
2394               else if (CONST_INT_P (op1))
2395                 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2396                                         - INTVAL (op1));
2397               else
2398                 {
2399                   other_amount
2400                     = simplify_gen_unary (NEG, GET_MODE (op1),
2401                                           op1, GET_MODE (op1));
2402                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2403                   other_amount
2404                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2405                                            gen_int_mode (mask, GET_MODE (op1)));
2406                 }
2407
2408               shifted = force_reg (mode, shifted);
2409
2410               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2411                                      mode, shifted, new_amount, 0, 1);
2412               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2413                                       mode, shifted, other_amount,
2414                                       subtarget, 1);
2415               return expand_binop (mode, ior_optab, temp, temp1, target,
2416                                    unsignedp, methods);
2417             }
2418
2419           temp = expand_binop (mode,
2420                                left ? lrotate_optab : rrotate_optab,
2421                                shifted, op1, target, unsignedp, methods);
2422         }
2423       else if (unsignedp)
2424         temp = expand_binop (mode,
2425                              left ? lshift_optab : rshift_uns_optab,
2426                              shifted, op1, target, unsignedp, methods);
2427
2428       /* Do arithmetic shifts.
2429          Also, if we are going to widen the operand, we can just as well
2430          use an arithmetic right-shift instead of a logical one.  */
2431       if (temp == 0 && ! rotate
2432           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2433         {
2434           enum optab_methods methods1 = methods;
2435
2436           /* If trying to widen a log shift to an arithmetic shift,
2437              don't accept an arithmetic shift of the same size.  */
2438           if (unsignedp)
2439             methods1 = OPTAB_MUST_WIDEN;
2440
2441           /* Arithmetic shift */
2442
2443           temp = expand_binop (mode,
2444                                left ? lshift_optab : rshift_arith_optab,
2445                                shifted, op1, target, unsignedp, methods1);
2446         }
2447
2448       /* We used to try extzv here for logical right shifts, but that was
2449          only useful for one machine, the VAX, and caused poor code
2450          generation there for lshrdi3, so the code was deleted and a
2451          define_expand for lshrsi3 was added to vax.md.  */
2452     }
2453
2454   gcc_assert (temp != NULL_RTX || may_fail);
2455   return temp;
2456 }
2457
2458 /* Output a shift instruction for expression code CODE,
2459    with SHIFTED being the rtx for the value to shift,
2460    and AMOUNT the amount to shift by.
2461    Store the result in the rtx TARGET, if that is convenient.
2462    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2463    Return the rtx for where the value is.  */
2464
2465 rtx
2466 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2467               int amount, rtx target, int unsignedp)
2468 {
2469   return expand_shift_1 (code, mode,
2470                          shifted, GEN_INT (amount), target, unsignedp);
2471 }
2472
2473 /* Likewise, but return 0 if that cannot be done.  */
2474
2475 static rtx
2476 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2477                     int amount, rtx target, int unsignedp)
2478 {
2479   return expand_shift_1 (code, mode,
2480                          shifted, GEN_INT (amount), target, unsignedp, true);
2481 }
2482
2483 /* Output a shift instruction for expression code CODE,
2484    with SHIFTED being the rtx for the value to shift,
2485    and AMOUNT the tree for the amount to shift by.
2486    Store the result in the rtx TARGET, if that is convenient.
2487    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2488    Return the rtx for where the value is.  */
2489
2490 rtx
2491 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2492                        tree amount, rtx target, int unsignedp)
2493 {
2494   return expand_shift_1 (code, mode,
2495                          shifted, expand_normal (amount), target, unsignedp);
2496 }
2497
2498 \f
2499 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2500                         const struct mult_cost *, machine_mode mode);
2501 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2502                               const struct algorithm *, enum mult_variant);
2503 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2504 static rtx extract_high_half (machine_mode, rtx);
2505 static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2506 static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
2507                                        int, int);
2508 /* Compute and return the best algorithm for multiplying by T.
2509    The algorithm must cost less than cost_limit
2510    If retval.cost >= COST_LIMIT, no algorithm was found and all
2511    other field of the returned struct are undefined.
2512    MODE is the machine mode of the multiplication.  */
2513
2514 static void
2515 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2516             const struct mult_cost *cost_limit, machine_mode mode)
2517 {
2518   int m;
2519   struct algorithm *alg_in, *best_alg;
2520   struct mult_cost best_cost;
2521   struct mult_cost new_limit;
2522   int op_cost, op_latency;
2523   unsigned HOST_WIDE_INT orig_t = t;
2524   unsigned HOST_WIDE_INT q;
2525   int maxm, hash_index;
2526   bool cache_hit = false;
2527   enum alg_code cache_alg = alg_zero;
2528   bool speed = optimize_insn_for_speed_p ();
2529   machine_mode imode;
2530   struct alg_hash_entry *entry_ptr;
2531
2532   /* Indicate that no algorithm is yet found.  If no algorithm
2533      is found, this value will be returned and indicate failure.  */
2534   alg_out->cost.cost = cost_limit->cost + 1;
2535   alg_out->cost.latency = cost_limit->latency + 1;
2536
2537   if (cost_limit->cost < 0
2538       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2539     return;
2540
2541   /* Be prepared for vector modes.  */
2542   imode = GET_MODE_INNER (mode);
2543
2544   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2545
2546   /* Restrict the bits of "t" to the multiplication's mode.  */
2547   t &= GET_MODE_MASK (imode);
2548
2549   /* t == 1 can be done in zero cost.  */
2550   if (t == 1)
2551     {
2552       alg_out->ops = 1;
2553       alg_out->cost.cost = 0;
2554       alg_out->cost.latency = 0;
2555       alg_out->op[0] = alg_m;
2556       return;
2557     }
2558
2559   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2560      fail now.  */
2561   if (t == 0)
2562     {
2563       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2564         return;
2565       else
2566         {
2567           alg_out->ops = 1;
2568           alg_out->cost.cost = zero_cost (speed);
2569           alg_out->cost.latency = zero_cost (speed);
2570           alg_out->op[0] = alg_zero;
2571           return;
2572         }
2573     }
2574
2575   /* We'll be needing a couple extra algorithm structures now.  */
2576
2577   alg_in = XALLOCA (struct algorithm);
2578   best_alg = XALLOCA (struct algorithm);
2579   best_cost = *cost_limit;
2580
2581   /* Compute the hash index.  */
2582   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2583
2584   /* See if we already know what to do for T.  */
2585   entry_ptr = alg_hash_entry_ptr (hash_index);
2586   if (entry_ptr->t == t
2587       && entry_ptr->mode == mode
2588       && entry_ptr->speed == speed
2589       && entry_ptr->alg != alg_unknown)
2590     {
2591       cache_alg = entry_ptr->alg;
2592
2593       if (cache_alg == alg_impossible)
2594         {
2595           /* The cache tells us that it's impossible to synthesize
2596              multiplication by T within entry_ptr->cost.  */
2597           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2598             /* COST_LIMIT is at least as restrictive as the one
2599                recorded in the hash table, in which case we have no
2600                hope of synthesizing a multiplication.  Just
2601                return.  */
2602             return;
2603
2604           /* If we get here, COST_LIMIT is less restrictive than the
2605              one recorded in the hash table, so we may be able to
2606              synthesize a multiplication.  Proceed as if we didn't
2607              have the cache entry.  */
2608         }
2609       else
2610         {
2611           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2612             /* The cached algorithm shows that this multiplication
2613                requires more cost than COST_LIMIT.  Just return.  This
2614                way, we don't clobber this cache entry with
2615                alg_impossible but retain useful information.  */
2616             return;
2617
2618           cache_hit = true;
2619
2620           switch (cache_alg)
2621             {
2622             case alg_shift:
2623               goto do_alg_shift;
2624
2625             case alg_add_t_m2:
2626             case alg_sub_t_m2:
2627               goto do_alg_addsub_t_m2;
2628
2629             case alg_add_factor:
2630             case alg_sub_factor:
2631               goto do_alg_addsub_factor;
2632
2633             case alg_add_t2_m:
2634               goto do_alg_add_t2_m;
2635
2636             case alg_sub_t2_m:
2637               goto do_alg_sub_t2_m;
2638
2639             default:
2640               gcc_unreachable ();
2641             }
2642         }
2643     }
2644
2645   /* If we have a group of zero bits at the low-order part of T, try
2646      multiplying by the remaining bits and then doing a shift.  */
2647
2648   if ((t & 1) == 0)
2649     {
2650     do_alg_shift:
2651       m = ctz_or_zero (t); /* m = number of low zero bits */
2652       if (m < maxm)
2653         {
2654           q = t >> m;
2655           /* The function expand_shift will choose between a shift and
2656              a sequence of additions, so the observed cost is given as
2657              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2658           op_cost = m * add_cost (speed, mode);
2659           if (shift_cost (speed, mode, m) < op_cost)
2660             op_cost = shift_cost (speed, mode, m);
2661           new_limit.cost = best_cost.cost - op_cost;
2662           new_limit.latency = best_cost.latency - op_cost;
2663           synth_mult (alg_in, q, &new_limit, mode);
2664
2665           alg_in->cost.cost += op_cost;
2666           alg_in->cost.latency += op_cost;
2667           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2668             {
2669               best_cost = alg_in->cost;
2670               std::swap (alg_in, best_alg);
2671               best_alg->log[best_alg->ops] = m;
2672               best_alg->op[best_alg->ops] = alg_shift;
2673             }
2674
2675           /* See if treating ORIG_T as a signed number yields a better
2676              sequence.  Try this sequence only for a negative ORIG_T
2677              as it would be useless for a non-negative ORIG_T.  */
2678           if ((HOST_WIDE_INT) orig_t < 0)
2679             {
2680               /* Shift ORIG_T as follows because a right shift of a
2681                  negative-valued signed type is implementation
2682                  defined.  */
2683               q = ~(~orig_t >> m);
2684               /* The function expand_shift will choose between a shift
2685                  and a sequence of additions, so the observed cost is
2686                  given as MIN (m * add_cost(speed, mode),
2687                  shift_cost(speed, mode, m)).  */
2688               op_cost = m * add_cost (speed, mode);
2689               if (shift_cost (speed, mode, m) < op_cost)
2690                 op_cost = shift_cost (speed, mode, m);
2691               new_limit.cost = best_cost.cost - op_cost;
2692               new_limit.latency = best_cost.latency - op_cost;
2693               synth_mult (alg_in, q, &new_limit, mode);
2694
2695               alg_in->cost.cost += op_cost;
2696               alg_in->cost.latency += op_cost;
2697               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2698                 {
2699                   best_cost = alg_in->cost;
2700                   std::swap (alg_in, best_alg);
2701                   best_alg->log[best_alg->ops] = m;
2702                   best_alg->op[best_alg->ops] = alg_shift;
2703                 }
2704             }
2705         }
2706       if (cache_hit)
2707         goto done;
2708     }
2709
2710   /* If we have an odd number, add or subtract one.  */
2711   if ((t & 1) != 0)
2712     {
2713       unsigned HOST_WIDE_INT w;
2714
2715     do_alg_addsub_t_m2:
2716       for (w = 1; (w & t) != 0; w <<= 1)
2717         ;
2718       /* If T was -1, then W will be zero after the loop.  This is another
2719          case where T ends with ...111.  Handling this with (T + 1) and
2720          subtract 1 produces slightly better code and results in algorithm
2721          selection much faster than treating it like the ...0111 case
2722          below.  */
2723       if (w == 0
2724           || (w > 2
2725               /* Reject the case where t is 3.
2726                  Thus we prefer addition in that case.  */
2727               && t != 3))
2728         {
2729           /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2730
2731           op_cost = add_cost (speed, mode);
2732           new_limit.cost = best_cost.cost - op_cost;
2733           new_limit.latency = best_cost.latency - op_cost;
2734           synth_mult (alg_in, t + 1, &new_limit, mode);
2735
2736           alg_in->cost.cost += op_cost;
2737           alg_in->cost.latency += op_cost;
2738           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2739             {
2740               best_cost = alg_in->cost;
2741               std::swap (alg_in, best_alg);
2742               best_alg->log[best_alg->ops] = 0;
2743               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2744             }
2745         }
2746       else
2747         {
2748           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2749
2750           op_cost = add_cost (speed, mode);
2751           new_limit.cost = best_cost.cost - op_cost;
2752           new_limit.latency = best_cost.latency - op_cost;
2753           synth_mult (alg_in, t - 1, &new_limit, mode);
2754
2755           alg_in->cost.cost += op_cost;
2756           alg_in->cost.latency += op_cost;
2757           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2758             {
2759               best_cost = alg_in->cost;
2760               std::swap (alg_in, best_alg);
2761               best_alg->log[best_alg->ops] = 0;
2762               best_alg->op[best_alg->ops] = alg_add_t_m2;
2763             }
2764         }
2765
2766       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2767          quickly with a - a * n for some appropriate constant n.  */
2768       m = exact_log2 (-orig_t + 1);
2769       if (m >= 0 && m < maxm)
2770         {
2771           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2772           /* If the target has a cheap shift-and-subtract insn use
2773              that in preference to a shift insn followed by a sub insn.
2774              Assume that the shift-and-sub is "atomic" with a latency
2775              equal to it's cost, otherwise assume that on superscalar
2776              hardware the shift may be executed concurrently with the
2777              earlier steps in the algorithm.  */
2778           if (shiftsub1_cost (speed, mode, m) <= op_cost)
2779             {
2780               op_cost = shiftsub1_cost (speed, mode, m);
2781               op_latency = op_cost;
2782             }
2783           else
2784             op_latency = add_cost (speed, mode);
2785
2786           new_limit.cost = best_cost.cost - op_cost;
2787           new_limit.latency = best_cost.latency - op_latency;
2788           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2789                       &new_limit, mode);
2790
2791           alg_in->cost.cost += op_cost;
2792           alg_in->cost.latency += op_latency;
2793           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2794             {
2795               best_cost = alg_in->cost;
2796               std::swap (alg_in, best_alg);
2797               best_alg->log[best_alg->ops] = m;
2798               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2799             }
2800         }
2801
2802       if (cache_hit)
2803         goto done;
2804     }
2805
2806   /* Look for factors of t of the form
2807      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2808      If we find such a factor, we can multiply by t using an algorithm that
2809      multiplies by q, shift the result by m and add/subtract it to itself.
2810
2811      We search for large factors first and loop down, even if large factors
2812      are less probable than small; if we find a large factor we will find a
2813      good sequence quickly, and therefore be able to prune (by decreasing
2814      COST_LIMIT) the search.  */
2815
2816  do_alg_addsub_factor:
2817   for (m = floor_log2 (t - 1); m >= 2; m--)
2818     {
2819       unsigned HOST_WIDE_INT d;
2820
2821       d = (HOST_WIDE_INT_1U << m) + 1;
2822       if (t % d == 0 && t > d && m < maxm
2823           && (!cache_hit || cache_alg == alg_add_factor))
2824         {
2825           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2826           if (shiftadd_cost (speed, mode, m) <= op_cost)
2827             op_cost = shiftadd_cost (speed, mode, m);
2828
2829           op_latency = op_cost;
2830
2831
2832           new_limit.cost = best_cost.cost - op_cost;
2833           new_limit.latency = best_cost.latency - op_latency;
2834           synth_mult (alg_in, t / d, &new_limit, mode);
2835
2836           alg_in->cost.cost += op_cost;
2837           alg_in->cost.latency += op_latency;
2838           if (alg_in->cost.latency < op_cost)
2839             alg_in->cost.latency = op_cost;
2840           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2841             {
2842               best_cost = alg_in->cost;
2843               std::swap (alg_in, best_alg);
2844               best_alg->log[best_alg->ops] = m;
2845               best_alg->op[best_alg->ops] = alg_add_factor;
2846             }
2847           /* Other factors will have been taken care of in the recursion.  */
2848           break;
2849         }
2850
2851       d = (HOST_WIDE_INT_1U << m) - 1;
2852       if (t % d == 0 && t > d && m < maxm
2853           && (!cache_hit || cache_alg == alg_sub_factor))
2854         {
2855           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2856           if (shiftsub0_cost (speed, mode, m) <= op_cost)
2857             op_cost = shiftsub0_cost (speed, mode, m);
2858
2859           op_latency = op_cost;
2860
2861           new_limit.cost = best_cost.cost - op_cost;
2862           new_limit.latency = best_cost.latency - op_latency;
2863           synth_mult (alg_in, t / d, &new_limit, mode);
2864
2865           alg_in->cost.cost += op_cost;
2866           alg_in->cost.latency += op_latency;
2867           if (alg_in->cost.latency < op_cost)
2868             alg_in->cost.latency = op_cost;
2869           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2870             {
2871               best_cost = alg_in->cost;
2872               std::swap (alg_in, best_alg);
2873               best_alg->log[best_alg->ops] = m;
2874               best_alg->op[best_alg->ops] = alg_sub_factor;
2875             }
2876           break;
2877         }
2878     }
2879   if (cache_hit)
2880     goto done;
2881
2882   /* Try shift-and-add (load effective address) instructions,
2883      i.e. do a*3, a*5, a*9.  */
2884   if ((t & 1) != 0)
2885     {
2886     do_alg_add_t2_m:
2887       q = t - 1;
2888       m = ctz_hwi (q);
2889       if (q && m < maxm)
2890         {
2891           op_cost = shiftadd_cost (speed, mode, m);
2892           new_limit.cost = best_cost.cost - op_cost;
2893           new_limit.latency = best_cost.latency - op_cost;
2894           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2895
2896           alg_in->cost.cost += op_cost;
2897           alg_in->cost.latency += op_cost;
2898           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2899             {
2900               best_cost = alg_in->cost;
2901               std::swap (alg_in, best_alg);
2902               best_alg->log[best_alg->ops] = m;
2903               best_alg->op[best_alg->ops] = alg_add_t2_m;
2904             }
2905         }
2906       if (cache_hit)
2907         goto done;
2908
2909     do_alg_sub_t2_m:
2910       q = t + 1;
2911       m = ctz_hwi (q);
2912       if (q && m < maxm)
2913         {
2914           op_cost = shiftsub0_cost (speed, mode, m);
2915           new_limit.cost = best_cost.cost - op_cost;
2916           new_limit.latency = best_cost.latency - op_cost;
2917           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2918
2919           alg_in->cost.cost += op_cost;
2920           alg_in->cost.latency += op_cost;
2921           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2922             {
2923               best_cost = alg_in->cost;
2924               std::swap (alg_in, best_alg);
2925               best_alg->log[best_alg->ops] = m;
2926               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2927             }
2928         }
2929       if (cache_hit)
2930         goto done;
2931     }
2932
2933  done:
2934   /* If best_cost has not decreased, we have not found any algorithm.  */
2935   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2936     {
2937       /* We failed to find an algorithm.  Record alg_impossible for
2938          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2939          we are asked to find an algorithm for T within the same or
2940          lower COST_LIMIT, we can immediately return to the
2941          caller.  */
2942       entry_ptr->t = t;
2943       entry_ptr->mode = mode;
2944       entry_ptr->speed = speed;
2945       entry_ptr->alg = alg_impossible;
2946       entry_ptr->cost = *cost_limit;
2947       return;
2948     }
2949
2950   /* Cache the result.  */
2951   if (!cache_hit)
2952     {
2953       entry_ptr->t = t;
2954       entry_ptr->mode = mode;
2955       entry_ptr->speed = speed;
2956       entry_ptr->alg = best_alg->op[best_alg->ops];
2957       entry_ptr->cost.cost = best_cost.cost;
2958       entry_ptr->cost.latency = best_cost.latency;
2959     }
2960
2961   /* If we are getting a too long sequence for `struct algorithm'
2962      to record, make this search fail.  */
2963   if (best_alg->ops == MAX_BITS_PER_WORD)
2964     return;
2965
2966   /* Copy the algorithm from temporary space to the space at alg_out.
2967      We avoid using structure assignment because the majority of
2968      best_alg is normally undefined, and this is a critical function.  */
2969   alg_out->ops = best_alg->ops + 1;
2970   alg_out->cost = best_cost;
2971   memcpy (alg_out->op, best_alg->op,
2972           alg_out->ops * sizeof *alg_out->op);
2973   memcpy (alg_out->log, best_alg->log,
2974           alg_out->ops * sizeof *alg_out->log);
2975 }
2976 \f
2977 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2978    Try three variations:
2979
2980        - a shift/add sequence based on VAL itself
2981        - a shift/add sequence based on -VAL, followed by a negation
2982        - a shift/add sequence based on VAL - 1, followed by an addition.
2983
2984    Return true if the cheapest of these cost less than MULT_COST,
2985    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2986
2987 bool
2988 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
2989                      struct algorithm *alg, enum mult_variant *variant,
2990                      int mult_cost)
2991 {
2992   struct algorithm alg2;
2993   struct mult_cost limit;
2994   int op_cost;
2995   bool speed = optimize_insn_for_speed_p ();
2996
2997   /* Fail quickly for impossible bounds.  */
2998   if (mult_cost < 0)
2999     return false;
3000
3001   /* Ensure that mult_cost provides a reasonable upper bound.
3002      Any constant multiplication can be performed with less
3003      than 2 * bits additions.  */
3004   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3005   if (mult_cost > op_cost)
3006     mult_cost = op_cost;
3007
3008   *variant = basic_variant;
3009   limit.cost = mult_cost;
3010   limit.latency = mult_cost;
3011   synth_mult (alg, val, &limit, mode);
3012
3013   /* This works only if the inverted value actually fits in an
3014      `unsigned int' */
3015   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3016     {
3017       op_cost = neg_cost (speed, mode);
3018       if (MULT_COST_LESS (&alg->cost, mult_cost))
3019         {
3020           limit.cost = alg->cost.cost - op_cost;
3021           limit.latency = alg->cost.latency - op_cost;
3022         }
3023       else
3024         {
3025           limit.cost = mult_cost - op_cost;
3026           limit.latency = mult_cost - op_cost;
3027         }
3028
3029       synth_mult (&alg2, -val, &limit, mode);
3030       alg2.cost.cost += op_cost;
3031       alg2.cost.latency += op_cost;
3032       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3033         *alg = alg2, *variant = negate_variant;
3034     }
3035
3036   /* This proves very useful for division-by-constant.  */
3037   op_cost = add_cost (speed, mode);
3038   if (MULT_COST_LESS (&alg->cost, mult_cost))
3039     {
3040       limit.cost = alg->cost.cost - op_cost;
3041       limit.latency = alg->cost.latency - op_cost;
3042     }
3043   else
3044     {
3045       limit.cost = mult_cost - op_cost;
3046       limit.latency = mult_cost - op_cost;
3047     }
3048
3049   synth_mult (&alg2, val - 1, &limit, mode);
3050   alg2.cost.cost += op_cost;
3051   alg2.cost.latency += op_cost;
3052   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3053     *alg = alg2, *variant = add_variant;
3054
3055   return MULT_COST_LESS (&alg->cost, mult_cost);
3056 }
3057
3058 /* A subroutine of expand_mult, used for constant multiplications.
3059    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3060    convenient.  Use the shift/add sequence described by ALG and apply
3061    the final fixup specified by VARIANT.  */
3062
3063 static rtx
3064 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3065                    rtx target, const struct algorithm *alg,
3066                    enum mult_variant variant)
3067 {
3068   unsigned HOST_WIDE_INT val_so_far;
3069   rtx_insn *insn;
3070   rtx accum, tem;
3071   int opno;
3072   machine_mode nmode;
3073
3074   /* Avoid referencing memory over and over and invalid sharing
3075      on SUBREGs.  */
3076   op0 = force_reg (mode, op0);
3077
3078   /* ACCUM starts out either as OP0 or as a zero, depending on
3079      the first operation.  */
3080
3081   if (alg->op[0] == alg_zero)
3082     {
3083       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3084       val_so_far = 0;
3085     }
3086   else if (alg->op[0] == alg_m)
3087     {
3088       accum = copy_to_mode_reg (mode, op0);
3089       val_so_far = 1;
3090     }
3091   else
3092     gcc_unreachable ();
3093
3094   for (opno = 1; opno < alg->ops; opno++)
3095     {
3096       int log = alg->log[opno];
3097       rtx shift_subtarget = optimize ? 0 : accum;
3098       rtx add_target
3099         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3100            && !optimize)
3101           ? target : 0;
3102       rtx accum_target = optimize ? 0 : accum;
3103       rtx accum_inner;
3104
3105       switch (alg->op[opno])
3106         {
3107         case alg_shift:
3108           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3109           /* REG_EQUAL note will be attached to the following insn.  */
3110           emit_move_insn (accum, tem);
3111           val_so_far <<= log;
3112           break;
3113
3114         case alg_add_t_m2:
3115           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3116           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3117                                  add_target ? add_target : accum_target);
3118           val_so_far += HOST_WIDE_INT_1U << log;
3119           break;
3120
3121         case alg_sub_t_m2:
3122           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3123           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3124                                  add_target ? add_target : accum_target);
3125           val_so_far -= HOST_WIDE_INT_1U << log;
3126           break;
3127
3128         case alg_add_t2_m:
3129           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3130                                 log, shift_subtarget, 0);
3131           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3132                                  add_target ? add_target : accum_target);
3133           val_so_far = (val_so_far << log) + 1;
3134           break;
3135
3136         case alg_sub_t2_m:
3137           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3138                                 log, shift_subtarget, 0);
3139           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3140                                  add_target ? add_target : accum_target);
3141           val_so_far = (val_so_far << log) - 1;
3142           break;
3143
3144         case alg_add_factor:
3145           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3146           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3147                                  add_target ? add_target : accum_target);
3148           val_so_far += val_so_far << log;
3149           break;
3150
3151         case alg_sub_factor:
3152           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3153           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3154                                  (add_target
3155                                   ? add_target : (optimize ? 0 : tem)));
3156           val_so_far = (val_so_far << log) - val_so_far;
3157           break;
3158
3159         default:
3160           gcc_unreachable ();
3161         }
3162
3163       if (SCALAR_INT_MODE_P (mode))
3164         {
3165           /* Write a REG_EQUAL note on the last insn so that we can cse
3166              multiplication sequences.  Note that if ACCUM is a SUBREG,
3167              we've set the inner register and must properly indicate that.  */
3168           tem = op0, nmode = mode;
3169           accum_inner = accum;
3170           if (GET_CODE (accum) == SUBREG)
3171             {
3172               accum_inner = SUBREG_REG (accum);
3173               nmode = GET_MODE (accum_inner);
3174               tem = gen_lowpart (nmode, op0);
3175             }
3176
3177           insn = get_last_insn ();
3178           set_dst_reg_note (insn, REG_EQUAL,
3179                             gen_rtx_MULT (nmode, tem,
3180                                           gen_int_mode (val_so_far, nmode)),
3181                             accum_inner);
3182         }
3183     }
3184
3185   if (variant == negate_variant)
3186     {
3187       val_so_far = -val_so_far;
3188       accum = expand_unop (mode, neg_optab, accum, target, 0);
3189     }
3190   else if (variant == add_variant)
3191     {
3192       val_so_far = val_so_far + 1;
3193       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3194     }
3195
3196   /* Compare only the bits of val and val_so_far that are significant
3197      in the result mode, to avoid sign-/zero-extension confusion.  */
3198   nmode = GET_MODE_INNER (mode);
3199   val &= GET_MODE_MASK (nmode);
3200   val_so_far &= GET_MODE_MASK (nmode);
3201   gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3202
3203   return accum;
3204 }
3205
3206 /* Perform a multiplication and return an rtx for the result.
3207    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3208    TARGET is a suggestion for where to store the result (an rtx).
3209
3210    We check specially for a constant integer as OP1.
3211    If you want this check for OP0 as well, then before calling
3212    you should swap the two operands if OP0 would be constant.  */
3213
3214 rtx
3215 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3216              int unsignedp)
3217 {
3218   enum mult_variant variant;
3219   struct algorithm algorithm;
3220   rtx scalar_op1;
3221   int max_cost;
3222   bool speed = optimize_insn_for_speed_p ();
3223   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3224
3225   if (CONSTANT_P (op0))
3226     std::swap (op0, op1);
3227
3228   /* For vectors, there are several simplifications that can be made if
3229      all elements of the vector constant are identical.  */
3230   scalar_op1 = unwrap_const_vec_duplicate (op1);
3231
3232   if (INTEGRAL_MODE_P (mode))
3233     {
3234       rtx fake_reg;
3235       HOST_WIDE_INT coeff;
3236       bool is_neg;
3237       int mode_bitsize;
3238
3239       if (op1 == CONST0_RTX (mode))
3240         return op1;
3241       if (op1 == CONST1_RTX (mode))
3242         return op0;
3243       if (op1 == CONSTM1_RTX (mode))
3244         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3245                             op0, target, 0);
3246
3247       if (do_trapv)
3248         goto skip_synth;
3249
3250       /* If mode is integer vector mode, check if the backend supports
3251          vector lshift (by scalar or vector) at all.  If not, we can't use
3252          synthetized multiply.  */
3253       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3254           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3255           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3256         goto skip_synth;
3257
3258       /* These are the operations that are potentially turned into
3259          a sequence of shifts and additions.  */
3260       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3261
3262       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3263          less than or equal in size to `unsigned int' this doesn't matter.
3264          If the mode is larger than `unsigned int', then synth_mult works
3265          only if the constant value exactly fits in an `unsigned int' without
3266          any truncation.  This means that multiplying by negative values does
3267          not work; results are off by 2^32 on a 32 bit machine.  */
3268       if (CONST_INT_P (scalar_op1))
3269         {
3270           coeff = INTVAL (scalar_op1);
3271           is_neg = coeff < 0;
3272         }
3273 #if TARGET_SUPPORTS_WIDE_INT
3274       else if (CONST_WIDE_INT_P (scalar_op1))
3275 #else
3276       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3277 #endif
3278         {
3279           int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3280           /* Perfect power of 2 (other than 1, which is handled above).  */
3281           if (shift > 0)
3282             return expand_shift (LSHIFT_EXPR, mode, op0,
3283                                  shift, target, unsignedp);
3284           else
3285             goto skip_synth;
3286         }
3287       else
3288         goto skip_synth;
3289
3290       /* We used to test optimize here, on the grounds that it's better to
3291          produce a smaller program when -O is not used.  But this causes
3292          such a terrible slowdown sometimes that it seems better to always
3293          use synth_mult.  */
3294
3295       /* Special case powers of two.  */
3296       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3297           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3298         return expand_shift (LSHIFT_EXPR, mode, op0,
3299                              floor_log2 (coeff), target, unsignedp);
3300
3301       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3302
3303       /* Attempt to handle multiplication of DImode values by negative
3304          coefficients, by performing the multiplication by a positive
3305          multiplier and then inverting the result.  */
3306       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3307         {
3308           /* Its safe to use -coeff even for INT_MIN, as the
3309              result is interpreted as an unsigned coefficient.
3310              Exclude cost of op0 from max_cost to match the cost
3311              calculation of the synth_mult.  */
3312           coeff = -(unsigned HOST_WIDE_INT) coeff;
3313           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3314                                     mode, speed)
3315                       - neg_cost (speed, mode));
3316           if (max_cost <= 0)
3317             goto skip_synth;
3318
3319           /* Special case powers of two.  */
3320           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3321             {
3322               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3323                                        floor_log2 (coeff), target, unsignedp);
3324               return expand_unop (mode, neg_optab, temp, target, 0);
3325             }
3326
3327           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3328                                    max_cost))
3329             {
3330               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3331                                             &algorithm, variant);
3332               return expand_unop (mode, neg_optab, temp, target, 0);
3333             }
3334           goto skip_synth;
3335         }
3336
3337       /* Exclude cost of op0 from max_cost to match the cost
3338          calculation of the synth_mult.  */
3339       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3340       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3341         return expand_mult_const (mode, op0, coeff, target,
3342                                   &algorithm, variant);
3343     }
3344  skip_synth:
3345
3346   /* Expand x*2.0 as x+x.  */
3347   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3348       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3349     {
3350       op0 = force_reg (GET_MODE (op0), op0);
3351       return expand_binop (mode, add_optab, op0, op0,
3352                            target, unsignedp, OPTAB_LIB_WIDEN);
3353     }
3354
3355   /* This used to use umul_optab if unsigned, but for non-widening multiply
3356      there is no difference between signed and unsigned.  */
3357   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3358                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3359   gcc_assert (op0);
3360   return op0;
3361 }
3362
3363 /* Return a cost estimate for multiplying a register by the given
3364    COEFFicient in the given MODE and SPEED.  */
3365
3366 int
3367 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3368 {
3369   int max_cost;
3370   struct algorithm algorithm;
3371   enum mult_variant variant;
3372
3373   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3374   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3375                            mode, speed);
3376   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3377     return algorithm.cost.cost;
3378   else
3379     return max_cost;
3380 }
3381
3382 /* Perform a widening multiplication and return an rtx for the result.
3383    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3384    TARGET is a suggestion for where to store the result (an rtx).
3385    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3386    or smul_widen_optab.
3387
3388    We check specially for a constant integer as OP1, comparing the
3389    cost of a widening multiply against the cost of a sequence of shifts
3390    and adds.  */
3391
3392 rtx
3393 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3394                       int unsignedp, optab this_optab)
3395 {
3396   bool speed = optimize_insn_for_speed_p ();
3397   rtx cop1;
3398
3399   if (CONST_INT_P (op1)
3400       && GET_MODE (op0) != VOIDmode
3401       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3402                                 this_optab == umul_widen_optab))
3403       && CONST_INT_P (cop1)
3404       && (INTVAL (cop1) >= 0
3405           || HWI_COMPUTABLE_MODE_P (mode)))
3406     {
3407       HOST_WIDE_INT coeff = INTVAL (cop1);
3408       int max_cost;
3409       enum mult_variant variant;
3410       struct algorithm algorithm;
3411
3412       if (coeff == 0)
3413         return CONST0_RTX (mode);
3414
3415       /* Special case powers of two.  */
3416       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3417         {
3418           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3419           return expand_shift (LSHIFT_EXPR, mode, op0,
3420                                floor_log2 (coeff), target, unsignedp);
3421         }
3422
3423       /* Exclude cost of op0 from max_cost to match the cost
3424          calculation of the synth_mult.  */
3425       max_cost = mul_widen_cost (speed, mode);
3426       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3427                                max_cost))
3428         {
3429           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3430           return expand_mult_const (mode, op0, coeff, target,
3431                                     &algorithm, variant);
3432         }
3433     }
3434   return expand_binop (mode, this_optab, op0, op1, target,
3435                        unsignedp, OPTAB_LIB_WIDEN);
3436 }
3437 \f
3438 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3439    replace division by D, and put the least significant N bits of the result
3440    in *MULTIPLIER_PTR and return the most significant bit.
3441
3442    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3443    needed precision is in PRECISION (should be <= N).
3444
3445    PRECISION should be as small as possible so this function can choose
3446    multiplier more freely.
3447
3448    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3449    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3450
3451    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3452    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3453
3454 unsigned HOST_WIDE_INT
3455 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3456                    unsigned HOST_WIDE_INT *multiplier_ptr,
3457                    int *post_shift_ptr, int *lgup_ptr)
3458 {
3459   int lgup, post_shift;
3460   int pow, pow2;
3461
3462   /* lgup = ceil(log2(divisor)); */
3463   lgup = ceil_log2 (d);
3464
3465   gcc_assert (lgup <= n);
3466
3467   pow = n + lgup;
3468   pow2 = n + lgup - precision;
3469
3470   /* mlow = 2^(N + lgup)/d */
3471   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3472   wide_int mlow = wi::udiv_trunc (val, d);
3473
3474   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3475   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3476   wide_int mhigh = wi::udiv_trunc (val, d);
3477
3478   /* If precision == N, then mlow, mhigh exceed 2^N
3479      (but they do not exceed 2^(N+1)).  */
3480
3481   /* Reduce to lowest terms.  */
3482   for (post_shift = lgup; post_shift > 0; post_shift--)
3483     {
3484       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3485                                                        HOST_BITS_PER_WIDE_INT);
3486       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3487                                                        HOST_BITS_PER_WIDE_INT);
3488       if (ml_lo >= mh_lo)
3489         break;
3490
3491       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3492       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3493     }
3494
3495   *post_shift_ptr = post_shift;
3496   *lgup_ptr = lgup;
3497   if (n < HOST_BITS_PER_WIDE_INT)
3498     {
3499       unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3500       *multiplier_ptr = mhigh.to_uhwi () & mask;
3501       return mhigh.to_uhwi () >= mask;
3502     }
3503   else
3504     {
3505       *multiplier_ptr = mhigh.to_uhwi ();
3506       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3507     }
3508 }
3509
3510 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3511    congruent to 1 (mod 2**N).  */
3512
3513 static unsigned HOST_WIDE_INT
3514 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3515 {
3516   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3517
3518   /* The algorithm notes that the choice y = x satisfies
3519      x*y == 1 mod 2^3, since x is assumed odd.
3520      Each iteration doubles the number of bits of significance in y.  */
3521
3522   unsigned HOST_WIDE_INT mask;
3523   unsigned HOST_WIDE_INT y = x;
3524   int nbit = 3;
3525
3526   mask = (n == HOST_BITS_PER_WIDE_INT
3527           ? HOST_WIDE_INT_M1U
3528           : (HOST_WIDE_INT_1U << n) - 1);
3529
3530   while (nbit < n)
3531     {
3532       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3533       nbit *= 2;
3534     }
3535   return y;
3536 }
3537
3538 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3539    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3540    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3541    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3542    become signed.
3543
3544    The result is put in TARGET if that is convenient.
3545
3546    MODE is the mode of operation.  */
3547
3548 rtx
3549 expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
3550                              rtx op1, rtx target, int unsignedp)
3551 {
3552   rtx tem;
3553   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3554
3555   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3556                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3557   tem = expand_and (mode, tem, op1, NULL_RTX);
3558   adj_operand
3559     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3560                      adj_operand);
3561
3562   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3563                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3564   tem = expand_and (mode, tem, op0, NULL_RTX);
3565   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3566                           target);
3567
3568   return target;
3569 }
3570
3571 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3572
3573 static rtx
3574 extract_high_half (machine_mode mode, rtx op)
3575 {
3576   machine_mode wider_mode;
3577
3578   if (mode == word_mode)
3579     return gen_highpart (mode, op);
3580
3581   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3582
3583   wider_mode = GET_MODE_WIDER_MODE (mode);
3584   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3585                      GET_MODE_BITSIZE (mode), 0, 1);
3586   return convert_modes (mode, wider_mode, op, 0);
3587 }
3588
3589 /* Like expmed_mult_highpart, but only consider using a multiplication
3590    optab.  OP1 is an rtx for the constant operand.  */
3591
3592 static rtx
3593 expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
3594                             rtx target, int unsignedp, int max_cost)
3595 {
3596   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3597   machine_mode wider_mode;
3598   optab moptab;
3599   rtx tem;
3600   int size;
3601   bool speed = optimize_insn_for_speed_p ();
3602
3603   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3604
3605   wider_mode = GET_MODE_WIDER_MODE (mode);
3606   size = GET_MODE_BITSIZE (mode);
3607
3608   /* Firstly, try using a multiplication insn that only generates the needed
3609      high part of the product, and in the sign flavor of unsignedp.  */
3610   if (mul_highpart_cost (speed, mode) < max_cost)
3611     {
3612       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3613       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3614                           unsignedp, OPTAB_DIRECT);
3615       if (tem)
3616         return tem;
3617     }
3618
3619   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3620      Need to adjust the result after the multiplication.  */
3621   if (size - 1 < BITS_PER_WORD
3622       && (mul_highpart_cost (speed, mode)
3623           + 2 * shift_cost (speed, mode, size-1)
3624           + 4 * add_cost (speed, mode) < max_cost))
3625     {
3626       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3627       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3628                           unsignedp, OPTAB_DIRECT);
3629       if (tem)
3630         /* We used the wrong signedness.  Adjust the result.  */
3631         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3632                                             tem, unsignedp);
3633     }
3634
3635   /* Try widening multiplication.  */
3636   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3637   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3638       && mul_widen_cost (speed, wider_mode) < max_cost)
3639     {
3640       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3641                           unsignedp, OPTAB_WIDEN);
3642       if (tem)
3643         return extract_high_half (mode, tem);
3644     }
3645
3646   /* Try widening the mode and perform a non-widening multiplication.  */
3647   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3648       && size - 1 < BITS_PER_WORD
3649       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3650           < max_cost))
3651     {
3652       rtx_insn *insns;
3653       rtx wop0, wop1;
3654
3655       /* We need to widen the operands, for example to ensure the
3656          constant multiplier is correctly sign or zero extended.
3657          Use a sequence to clean-up any instructions emitted by
3658          the conversions if things don't work out.  */
3659       start_sequence ();
3660       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3661       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3662       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3663                           unsignedp, OPTAB_WIDEN);
3664       insns = get_insns ();
3665       end_sequence ();
3666
3667       if (tem)
3668         {
3669           emit_insn (insns);
3670           return extract_high_half (mode, tem);
3671         }
3672     }
3673
3674   /* Try widening multiplication of opposite signedness, and adjust.  */
3675   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3676   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3677       && size - 1 < BITS_PER_WORD
3678       && (mul_widen_cost (speed, wider_mode)
3679           + 2 * shift_cost (speed, mode, size-1)
3680           + 4 * add_cost (speed, mode) < max_cost))
3681     {
3682       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3683                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3684       if (tem != 0)
3685         {
3686           tem = extract_high_half (mode, tem);
3687           /* We used the wrong signedness.  Adjust the result.  */
3688           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3689                                               target, unsignedp);
3690         }
3691     }
3692
3693   return 0;
3694 }
3695
3696 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3697    putting the high half of the result in TARGET if that is convenient,
3698    and return where the result is.  If the operation can not be performed,
3699    0 is returned.
3700
3701    MODE is the mode of operation and result.
3702
3703    UNSIGNEDP nonzero means unsigned multiply.
3704
3705    MAX_COST is the total allowed cost for the expanded RTL.  */
3706
3707 static rtx
3708 expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
3709                       rtx target, int unsignedp, int max_cost)
3710 {
3711   machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3712   unsigned HOST_WIDE_INT cnst1;
3713   int extra_cost;
3714   bool sign_adjust = false;
3715   enum mult_variant variant;
3716   struct algorithm alg;
3717   rtx tem;
3718   bool speed = optimize_insn_for_speed_p ();
3719
3720   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3721   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3722   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3723
3724   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3725
3726   /* We can't optimize modes wider than BITS_PER_WORD.
3727      ??? We might be able to perform double-word arithmetic if
3728      mode == word_mode, however all the cost calculations in
3729      synth_mult etc. assume single-word operations.  */
3730   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3731     return expmed_mult_highpart_optab (mode, op0, op1, target,
3732                                        unsignedp, max_cost);
3733
3734   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3735
3736   /* Check whether we try to multiply by a negative constant.  */
3737   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3738     {
3739       sign_adjust = true;
3740       extra_cost += add_cost (speed, mode);
3741     }
3742
3743   /* See whether shift/add multiplication is cheap enough.  */
3744   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3745                            max_cost - extra_cost))
3746     {
3747       /* See whether the specialized multiplication optabs are
3748          cheaper than the shift/add version.  */
3749       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3750                                         alg.cost.cost + extra_cost);
3751       if (tem)
3752         return tem;
3753
3754       tem = convert_to_mode (wider_mode, op0, unsignedp);
3755       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3756       tem = extract_high_half (mode, tem);
3757
3758       /* Adjust result for signedness.  */
3759       if (sign_adjust)
3760         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3761
3762       return tem;
3763     }
3764   return expmed_mult_highpart_optab (mode, op0, op1, target,
3765                                      unsignedp, max_cost);
3766 }
3767
3768
3769 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3770
3771 static rtx
3772 expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3773 {
3774   rtx result, temp, shift;
3775   rtx_code_label *label;
3776   int logd;
3777   int prec = GET_MODE_PRECISION (mode);
3778
3779   logd = floor_log2 (d);
3780   result = gen_reg_rtx (mode);
3781
3782   /* Avoid conditional branches when they're expensive.  */
3783   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3784       && optimize_insn_for_speed_p ())
3785     {
3786       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3787                                       mode, 0, -1);
3788       if (signmask)
3789         {
3790           HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
3791           signmask = force_reg (mode, signmask);
3792           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3793
3794           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3795              which instruction sequence to use.  If logical right shifts
3796              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3797              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3798
3799           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3800           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3801               || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
3802                   > COSTS_N_INSNS (2)))
3803             {
3804               temp = expand_binop (mode, xor_optab, op0, signmask,
3805                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3806               temp = expand_binop (mode, sub_optab, temp, signmask,
3807                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3808               temp = expand_binop (mode, and_optab, temp,
3809                                    gen_int_mode (masklow, mode),
3810                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3811               temp = expand_binop (mode, xor_optab, temp, signmask,
3812                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3813               temp = expand_binop (mode, sub_optab, temp, signmask,
3814                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3815             }
3816           else
3817             {
3818               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3819                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3820               signmask = force_reg (mode, signmask);
3821
3822               temp = expand_binop (mode, add_optab, op0, signmask,
3823                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3824               temp = expand_binop (mode, and_optab, temp,
3825                                    gen_int_mode (masklow, mode),
3826                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3827               temp = expand_binop (mode, sub_optab, temp, signmask,
3828                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3829             }
3830           return temp;
3831         }
3832     }
3833
3834   /* Mask contains the mode's signbit and the significant bits of the
3835      modulus.  By including the signbit in the operation, many targets
3836      can avoid an explicit compare operation in the following comparison
3837      against zero.  */
3838   wide_int mask = wi::mask (logd, false, prec);
3839   mask = wi::set_bit (mask, prec - 1);
3840
3841   temp = expand_binop (mode, and_optab, op0,
3842                        immed_wide_int_const (mask, mode),
3843                        result, 1, OPTAB_LIB_WIDEN);
3844   if (temp != result)
3845     emit_move_insn (result, temp);
3846
3847   label = gen_label_rtx ();
3848   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3849
3850   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3851                        0, OPTAB_LIB_WIDEN);
3852
3853   mask = wi::mask (logd, true, prec);
3854   temp = expand_binop (mode, ior_optab, temp,
3855                        immed_wide_int_const (mask, mode),
3856                        result, 1, OPTAB_LIB_WIDEN);
3857   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3858                        0, OPTAB_LIB_WIDEN);
3859   if (temp != result)
3860     emit_move_insn (result, temp);
3861   emit_label (label);
3862   return result;
3863 }
3864
3865 /* Expand signed division of OP0 by a power of two D in mode MODE.
3866    This routine is only called for positive values of D.  */
3867
3868 static rtx
3869 expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3870 {
3871   rtx temp;
3872   rtx_code_label *label;
3873   int logd;
3874
3875   logd = floor_log2 (d);
3876
3877   if (d == 2
3878       && BRANCH_COST (optimize_insn_for_speed_p (),
3879                       false) >= 1)
3880     {
3881       temp = gen_reg_rtx (mode);
3882       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3883       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3884                            0, OPTAB_LIB_WIDEN);
3885       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3886     }
3887
3888   if (HAVE_conditional_move
3889       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
3890     {
3891       rtx temp2;
3892
3893       start_sequence ();
3894       temp2 = copy_to_mode_reg (mode, op0);
3895       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3896                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3897       temp = force_reg (mode, temp);
3898
3899       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3900       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3901                                      mode, temp, temp2, mode, 0);
3902       if (temp2)
3903         {
3904           rtx_insn *seq = get_insns ();
3905           end_sequence ();
3906           emit_insn (seq);
3907           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3908         }
3909       end_sequence ();
3910     }
3911
3912   if (BRANCH_COST (optimize_insn_for_speed_p (),
3913                    false) >= 2)
3914     {
3915       int ushift = GET_MODE_BITSIZE (mode) - logd;
3916
3917       temp = gen_reg_rtx (mode);
3918       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3919       if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3920           || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3921              > COSTS_N_INSNS (1))
3922         temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3923                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3924       else
3925         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3926                              ushift, NULL_RTX, 1);
3927       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3928                            0, OPTAB_LIB_WIDEN);
3929       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3930     }
3931
3932   label = gen_label_rtx ();
3933   temp = copy_to_mode_reg (mode, op0);
3934   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3935   expand_inc (temp, gen_int_mode (d - 1, mode));
3936   emit_label (label);
3937   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3938 }
3939 \f
3940 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3941    if that is convenient, and returning where the result is.
3942    You may request either the quotient or the remainder as the result;
3943    specify REM_FLAG nonzero to get the remainder.
3944
3945    CODE is the expression code for which kind of division this is;
3946    it controls how rounding is done.  MODE is the machine mode to use.
3947    UNSIGNEDP nonzero means do unsigned division.  */
3948
3949 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3950    and then correct it by or'ing in missing high bits
3951    if result of ANDI is nonzero.
3952    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3953    This could optimize to a bfexts instruction.
3954    But C doesn't use these operations, so their optimizations are
3955    left for later.  */
3956 /* ??? For modulo, we don't actually need the highpart of the first product,
3957    the low part will do nicely.  And for small divisors, the second multiply
3958    can also be a low-part only multiply or even be completely left out.
3959    E.g. to calculate the remainder of a division by 3 with a 32 bit
3960    multiply, multiply with 0x55555556 and extract the upper two bits;
3961    the result is exact for inputs up to 0x1fffffff.
3962    The input range can be reduced by using cross-sum rules.
3963    For odd divisors >= 3, the following table gives right shift counts
3964    so that if a number is shifted by an integer multiple of the given
3965    amount, the remainder stays the same:
3966    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3967    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3968    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3969    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3970    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3971
3972    Cross-sum rules for even numbers can be derived by leaving as many bits
3973    to the right alone as the divisor has zeros to the right.
3974    E.g. if x is an unsigned 32 bit number:
3975    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3976    */
3977
3978 rtx
3979 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
3980                rtx op0, rtx op1, rtx target, int unsignedp)
3981 {
3982   machine_mode compute_mode;
3983   rtx tquotient;
3984   rtx quotient = 0, remainder = 0;
3985   rtx_insn *last;
3986   int size;
3987   rtx_insn *insn;
3988   optab optab1, optab2;
3989   int op1_is_constant, op1_is_pow2 = 0;
3990   int max_cost, extra_cost;
3991   static HOST_WIDE_INT last_div_const = 0;
3992   bool speed = optimize_insn_for_speed_p ();
3993
3994   op1_is_constant = CONST_INT_P (op1);
3995   if (op1_is_constant)
3996     {
3997       wide_int ext_op1 = rtx_mode_t (op1, mode);
3998       op1_is_pow2 = (wi::popcount (ext_op1) == 1
3999                      || (! unsignedp
4000                          && wi::popcount (wi::neg (ext_op1)) == 1));
4001     }
4002
4003   /*
4004      This is the structure of expand_divmod:
4005
4006      First comes code to fix up the operands so we can perform the operations
4007      correctly and efficiently.
4008
4009      Second comes a switch statement with code specific for each rounding mode.
4010      For some special operands this code emits all RTL for the desired
4011      operation, for other cases, it generates only a quotient and stores it in
4012      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4013      to indicate that it has not done anything.
4014
4015      Last comes code that finishes the operation.  If QUOTIENT is set and
4016      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4017      QUOTIENT is not set, it is computed using trunc rounding.
4018
4019      We try to generate special code for division and remainder when OP1 is a
4020      constant.  If |OP1| = 2**n we can use shifts and some other fast
4021      operations.  For other values of OP1, we compute a carefully selected
4022      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4023      by m.
4024
4025      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4026      half of the product.  Different strategies for generating the product are
4027      implemented in expmed_mult_highpart.
4028
4029      If what we actually want is the remainder, we generate that by another
4030      by-constant multiplication and a subtraction.  */
4031
4032   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4033      code below will malfunction if we are, so check here and handle
4034      the special case if so.  */
4035   if (op1 == const1_rtx)
4036     return rem_flag ? const0_rtx : op0;
4037
4038     /* When dividing by -1, we could get an overflow.
4039      negv_optab can handle overflows.  */
4040   if (! unsignedp && op1 == constm1_rtx)
4041     {
4042       if (rem_flag)
4043         return const0_rtx;
4044       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4045                           ? negv_optab : neg_optab, op0, target, 0);
4046     }
4047
4048   if (target
4049       /* Don't use the function value register as a target
4050          since we have to read it as well as write it,
4051          and function-inlining gets confused by this.  */
4052       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4053           /* Don't clobber an operand while doing a multi-step calculation.  */
4054           || ((rem_flag || op1_is_constant)
4055               && (reg_mentioned_p (target, op0)
4056                   || (MEM_P (op0) && MEM_P (target))))
4057           || reg_mentioned_p (target, op1)
4058           || (MEM_P (op1) && MEM_P (target))))
4059     target = 0;
4060
4061   /* Get the mode in which to perform this computation.  Normally it will
4062      be MODE, but sometimes we can't do the desired operation in MODE.
4063      If so, pick a wider mode in which we can do the operation.  Convert
4064      to that mode at the start to avoid repeated conversions.
4065
4066      First see what operations we need.  These depend on the expression
4067      we are evaluating.  (We assume that divxx3 insns exist under the
4068      same conditions that modxx3 insns and that these insns don't normally
4069      fail.  If these assumptions are not correct, we may generate less
4070      efficient code in some cases.)
4071
4072      Then see if we find a mode in which we can open-code that operation
4073      (either a division, modulus, or shift).  Finally, check for the smallest
4074      mode for which we can do the operation with a library call.  */
4075
4076   /* We might want to refine this now that we have division-by-constant
4077      optimization.  Since expmed_mult_highpart tries so many variants, it is
4078      not straightforward to generalize this.  Maybe we should make an array
4079      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4080
4081   optab1 = (op1_is_pow2
4082             ? (unsignedp ? lshr_optab : ashr_optab)
4083             : (unsignedp ? udiv_optab : sdiv_optab));
4084   optab2 = (op1_is_pow2 ? optab1
4085             : (unsignedp ? udivmod_optab : sdivmod_optab));
4086
4087   for (compute_mode = mode; compute_mode != VOIDmode;
4088        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4089     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4090         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4091       break;
4092
4093   if (compute_mode == VOIDmode)
4094     for (compute_mode = mode; compute_mode != VOIDmode;
4095          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4096       if (optab_libfunc (optab1, compute_mode)
4097           || optab_libfunc (optab2, compute_mode))
4098         break;
4099
4100   /* If we still couldn't find a mode, use MODE, but expand_binop will
4101      probably die.  */
4102   if (compute_mode == VOIDmode)
4103     compute_mode = mode;
4104
4105   if (target && GET_MODE (target) == compute_mode)
4106     tquotient = target;
4107   else
4108     tquotient = gen_reg_rtx (compute_mode);
4109
4110   size = GET_MODE_BITSIZE (compute_mode);
4111 #if 0
4112   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4113      (mode), and thereby get better code when OP1 is a constant.  Do that
4114      later.  It will require going over all usages of SIZE below.  */
4115   size = GET_MODE_BITSIZE (mode);
4116 #endif
4117
4118   /* Only deduct something for a REM if the last divide done was
4119      for a different constant.   Then set the constant of the last
4120      divide.  */
4121   max_cost = (unsignedp
4122               ? udiv_cost (speed, compute_mode)
4123               : sdiv_cost (speed, compute_mode));
4124   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4125                      && INTVAL (op1) == last_div_const))
4126     max_cost -= (mul_cost (speed, compute_mode)
4127                  + add_cost (speed, compute_mode));
4128
4129   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4130
4131   /* Now convert to the best mode to use.  */
4132   if (compute_mode != mode)
4133     {
4134       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4135       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4136
4137       /* convert_modes may have placed op1 into a register, so we
4138          must recompute the following.  */
4139       op1_is_constant = CONST_INT_P (op1);
4140       if (op1_is_constant)
4141         {
4142           wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4143           op1_is_pow2 = (wi::popcount (ext_op1) == 1
4144                          || (! unsignedp
4145                              && wi::popcount (wi::neg (ext_op1)) == 1));
4146         }
4147       else
4148         op1_is_pow2 = 0;
4149     }
4150
4151   /* If one of the operands is a volatile MEM, copy it into a register.  */
4152
4153   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4154     op0 = force_reg (compute_mode, op0);
4155   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4156     op1 = force_reg (compute_mode, op1);
4157
4158   /* If we need the remainder or if OP1 is constant, we need to
4159      put OP0 in a register in case it has any queued subexpressions.  */
4160   if (rem_flag || op1_is_constant)
4161     op0 = force_reg (compute_mode, op0);
4162
4163   last = get_last_insn ();
4164
4165   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4166   if (unsignedp)
4167     {
4168       if (code == FLOOR_DIV_EXPR)
4169         code = TRUNC_DIV_EXPR;
4170       if (code == FLOOR_MOD_EXPR)
4171         code = TRUNC_MOD_EXPR;
4172       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4173         code = TRUNC_DIV_EXPR;
4174     }
4175
4176   if (op1 != const0_rtx)
4177     switch (code)
4178       {
4179       case TRUNC_MOD_EXPR:
4180       case TRUNC_DIV_EXPR:
4181         if (op1_is_constant)
4182           {
4183             if (unsignedp)
4184               {
4185                 unsigned HOST_WIDE_INT mh, ml;
4186                 int pre_shift, post_shift;
4187                 int dummy;
4188                 wide_int wd = rtx_mode_t (op1, compute_mode);
4189                 unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4190
4191                 if (wi::popcount (wd) == 1)
4192                   {
4193                     pre_shift = floor_log2 (d);
4194                     if (rem_flag)
4195                       {
4196                         unsigned HOST_WIDE_INT mask
4197                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4198                         remainder
4199                           = expand_binop (compute_mode, and_optab, op0,
4200                                           gen_int_mode (mask, compute_mode),
4201                                           remainder, 1,
4202                                           OPTAB_LIB_WIDEN);
4203                         if (remainder)
4204                           return gen_lowpart (mode, remainder);
4205                       }
4206                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4207                                              pre_shift, tquotient, 1);
4208                   }
4209                 else if (size <= HOST_BITS_PER_WIDE_INT)
4210                   {
4211                     if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4212                       {
4213                         /* Most significant bit of divisor is set; emit an scc
4214                            insn.  */
4215                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4216                                                           compute_mode, 1, 1);
4217                       }
4218                     else
4219                       {
4220                         /* Find a suitable multiplier and right shift count
4221                            instead of multiplying with D.  */
4222
4223                         mh = choose_multiplier (d, size, size,
4224                                                 &ml, &post_shift, &dummy);
4225
4226                         /* If the suggested multiplier is more than SIZE bits,
4227                            we can do better for even divisors, using an
4228                            initial right shift.  */
4229                         if (mh != 0 && (d & 1) == 0)
4230                           {
4231                             pre_shift = ctz_or_zero (d);
4232                             mh = choose_multiplier (d >> pre_shift, size,
4233                                                     size - pre_shift,
4234                                                     &ml, &post_shift, &dummy);
4235                             gcc_assert (!mh);
4236                           }
4237                         else
4238                           pre_shift = 0;
4239
4240                         if (mh != 0)
4241                           {
4242                             rtx t1, t2, t3, t4;
4243
4244                             if (post_shift - 1 >= BITS_PER_WORD)
4245                               goto fail1;
4246
4247                             extra_cost
4248                               = (shift_cost (speed, compute_mode, post_shift - 1)
4249                                  + shift_cost (speed, compute_mode, 1)
4250                                  + 2 * add_cost (speed, compute_mode));
4251                             t1 = expmed_mult_highpart
4252                               (compute_mode, op0,
4253                                gen_int_mode (ml, compute_mode),
4254                                NULL_RTX, 1, max_cost - extra_cost);
4255                             if (t1 == 0)
4256                               goto fail1;
4257                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4258                                                                op0, t1),
4259                                                 NULL_RTX);
4260                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4261                                                t2, 1, NULL_RTX, 1);
4262                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4263                                                               t1, t3),
4264                                                 NULL_RTX);
4265                             quotient = expand_shift
4266                               (RSHIFT_EXPR, compute_mode, t4,
4267                                post_shift - 1, tquotient, 1);
4268                           }
4269                         else
4270                           {
4271                             rtx t1, t2;
4272
4273                             if (pre_shift >= BITS_PER_WORD
4274                                 || post_shift >= BITS_PER_WORD)
4275                               goto fail1;
4276
4277                             t1 = expand_shift
4278                               (RSHIFT_EXPR, compute_mode, op0,
4279                                pre_shift, NULL_RTX, 1);
4280                             extra_cost
4281                               = (shift_cost (speed, compute_mode, pre_shift)
4282                                  + shift_cost (speed, compute_mode, post_shift));
4283                             t2 = expmed_mult_highpart
4284                               (compute_mode, t1,
4285                                gen_int_mode (ml, compute_mode),
4286                                NULL_RTX, 1, max_cost - extra_cost);
4287                             if (t2 == 0)
4288                               goto fail1;
4289                             quotient = expand_shift
4290                               (RSHIFT_EXPR, compute_mode, t2,
4291                                post_shift, tquotient, 1);
4292                           }
4293                       }
4294                   }
4295                 else            /* Too wide mode to use tricky code */
4296                   break;
4297
4298                 insn = get_last_insn ();
4299                 if (insn != last)
4300                   set_dst_reg_note (insn, REG_EQUAL,
4301                                     gen_rtx_UDIV (compute_mode, op0, op1),
4302                                     quotient);
4303               }
4304             else                /* TRUNC_DIV, signed */
4305               {
4306                 unsigned HOST_WIDE_INT ml;
4307                 int lgup, post_shift;
4308                 rtx mlr;
4309                 HOST_WIDE_INT d = INTVAL (op1);
4310                 unsigned HOST_WIDE_INT abs_d;
4311
4312                 /* Since d might be INT_MIN, we have to cast to
4313                    unsigned HOST_WIDE_INT before negating to avoid
4314                    undefined signed overflow.  */
4315                 abs_d = (d >= 0
4316                          ? (unsigned HOST_WIDE_INT) d
4317                          : - (unsigned HOST_WIDE_INT) d);
4318
4319                 /* n rem d = n rem -d */
4320                 if (rem_flag && d < 0)
4321                   {
4322                     d = abs_d;
4323                     op1 = gen_int_mode (abs_d, compute_mode);
4324                   }
4325
4326                 if (d == 1)
4327                   quotient = op0;
4328                 else if (d == -1)
4329                   quotient = expand_unop (compute_mode, neg_optab, op0,
4330                                           tquotient, 0);
4331                 else if (size <= HOST_BITS_PER_WIDE_INT
4332                          && abs_d == HOST_WIDE_INT_1U << (size - 1))
4333                   {
4334                     /* This case is not handled correctly below.  */
4335                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4336                                                 compute_mode, 1, 1);
4337                     if (quotient == 0)
4338                       goto fail1;
4339                   }
4340                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4341                          && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4342                          && (rem_flag
4343                              ? smod_pow2_cheap (speed, compute_mode)
4344                              : sdiv_pow2_cheap (speed, compute_mode))
4345                          /* We assume that cheap metric is true if the
4346                             optab has an expander for this mode.  */
4347                          && ((optab_handler ((rem_flag ? smod_optab
4348                                               : sdiv_optab),
4349                                              compute_mode)
4350                               != CODE_FOR_nothing)
4351                              || (optab_handler (sdivmod_optab,
4352                                                 compute_mode)
4353                                  != CODE_FOR_nothing)))
4354                   ;
4355                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d)
4356                          && (size <= HOST_BITS_PER_WIDE_INT
4357                              || abs_d != (unsigned HOST_WIDE_INT) d))
4358                   {
4359                     if (rem_flag)
4360                       {
4361                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4362                         if (remainder)
4363                           return gen_lowpart (mode, remainder);
4364                       }
4365
4366                     if (sdiv_pow2_cheap (speed, compute_mode)
4367                         && ((optab_handler (sdiv_optab, compute_mode)
4368                              != CODE_FOR_nothing)
4369                             || (optab_handler (sdivmod_optab, compute_mode)
4370                                 != CODE_FOR_nothing)))
4371                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4372                                                 compute_mode, op0,
4373                                                 gen_int_mode (abs_d,
4374                                                               compute_mode),
4375                                                 NULL_RTX, 0);
4376                     else
4377                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4378
4379                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4380                        negate the quotient.  */
4381                     if (d < 0)
4382                       {
4383                         insn = get_last_insn ();
4384                         if (insn != last
4385                             && abs_d < (HOST_WIDE_INT_1U
4386                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4387                           set_dst_reg_note (insn, REG_EQUAL,
4388                                             gen_rtx_DIV (compute_mode, op0,
4389                                                          gen_int_mode
4390                                                            (abs_d,
4391                                                             compute_mode)),
4392                                             quotient);
4393
4394                         quotient = expand_unop (compute_mode, neg_optab,
4395                                                 quotient, quotient, 0);
4396                       }
4397                   }
4398                 else if (size <= HOST_BITS_PER_WIDE_INT)
4399                   {
4400                     choose_multiplier (abs_d, size, size - 1,
4401                                        &ml, &post_shift, &lgup);
4402                     if (ml < HOST_WIDE_INT_1U << (size - 1))
4403                       {
4404                         rtx t1, t2, t3;
4405
4406                         if (post_shift >= BITS_PER_WORD
4407                             || size - 1 >= BITS_PER_WORD)
4408                           goto fail1;
4409
4410                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4411                                       + shift_cost (speed, compute_mode, size - 1)
4412                                       + add_cost (speed, compute_mode));
4413                         t1 = expmed_mult_highpart
4414                           (compute_mode, op0, gen_int_mode (ml, compute_mode),
4415                            NULL_RTX, 0, max_cost - extra_cost);
4416                         if (t1 == 0)
4417                           goto fail1;
4418                         t2 = expand_shift
4419                           (RSHIFT_EXPR, compute_mode, t1,
4420                            post_shift, NULL_RTX, 0);
4421                         t3 = expand_shift
4422                           (RSHIFT_EXPR, compute_mode, op0,
4423                            size - 1, NULL_RTX, 0);
4424                         if (d < 0)
4425                           quotient
4426                             = force_operand (gen_rtx_MINUS (compute_mode,
4427                                                             t3, t2),
4428                                              tquotient);
4429                         else
4430                           quotient
4431                             = force_operand (gen_rtx_MINUS (compute_mode,
4432                                                             t2, t3),
4433                                              tquotient);
4434                       }
4435                     else
4436                       {
4437                         rtx t1, t2, t3, t4;
4438
4439                         if (post_shift >= BITS_PER_WORD
4440                             || size - 1 >= BITS_PER_WORD)
4441                           goto fail1;
4442
4443                         ml |= HOST_WIDE_INT_M1U << (size - 1);
4444                         mlr = gen_int_mode (ml, compute_mode);
4445                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4446                                       + shift_cost (speed, compute_mode, size - 1)
4447                                       + 2 * add_cost (speed, compute_mode));
4448                         t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4449                                                    NULL_RTX, 0,
4450                                                    max_cost - extra_cost);
4451                         if (t1 == 0)
4452                           goto fail1;
4453                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4454                                                           t1, op0),
4455                                             NULL_RTX);
4456                         t3 = expand_shift
4457                           (RSHIFT_EXPR, compute_mode, t2,
4458                            post_shift, NULL_RTX, 0);
4459                         t4 = expand_shift
4460                           (RSHIFT_EXPR, compute_mode, op0,
4461                            size - 1, NULL_RTX, 0);
4462                         if (d < 0)
4463                           quotient
4464                             = force_operand (gen_rtx_MINUS (compute_mode,
4465                                                             t4, t3),
4466                                              tquotient);
4467                         else
4468                           quotient
4469                             = force_operand (gen_rtx_MINUS (compute_mode,
4470                                                             t3, t4),
4471                                              tquotient);
4472                       }
4473                   }
4474                 else            /* Too wide mode to use tricky code */
4475                   break;
4476
4477                 insn = get_last_insn ();
4478                 if (insn != last)
4479                   set_dst_reg_note (insn, REG_EQUAL,
4480                                     gen_rtx_DIV (compute_mode, op0, op1),
4481                                     quotient);
4482               }
4483             break;
4484           }
4485       fail1:
4486         delete_insns_since (last);
4487         break;
4488
4489       case FLOOR_DIV_EXPR:
4490       case FLOOR_MOD_EXPR:
4491       /* We will come here only for signed operations.  */
4492         if (op1_is_constant && size <= HOST_BITS_PER_WIDE_INT)
4493           {
4494             unsigned HOST_WIDE_INT mh, ml;
4495             int pre_shift, lgup, post_shift;
4496             HOST_WIDE_INT d = INTVAL (op1);
4497
4498             if (d > 0)
4499               {
4500                 /* We could just as easily deal with negative constants here,
4501                    but it does not seem worth the trouble for GCC 2.6.  */
4502                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4503                   {
4504                     pre_shift = floor_log2 (d);
4505                     if (rem_flag)
4506                       {
4507                         unsigned HOST_WIDE_INT mask
4508                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4509                         remainder = expand_binop
4510                           (compute_mode, and_optab, op0,
4511                            gen_int_mode (mask, compute_mode),
4512                            remainder, 0, OPTAB_LIB_WIDEN);
4513                         if (remainder)
4514                           return gen_lowpart (mode, remainder);
4515                       }
4516                     quotient = expand_shift
4517                       (RSHIFT_EXPR, compute_mode, op0,
4518                        pre_shift, tquotient, 0);
4519                   }
4520                 else
4521                   {
4522                     rtx t1, t2, t3, t4;
4523
4524                     mh = choose_multiplier (d, size, size - 1,
4525                                             &ml, &post_shift, &lgup);
4526                     gcc_assert (!mh);
4527
4528                     if (post_shift < BITS_PER_WORD
4529                         && size - 1 < BITS_PER_WORD)
4530                       {
4531                         t1 = expand_shift
4532                           (RSHIFT_EXPR, compute_mode, op0,
4533                            size - 1, NULL_RTX, 0);
4534                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4535                                            NULL_RTX, 0, OPTAB_WIDEN);
4536                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4537                                       + shift_cost (speed, compute_mode, size - 1)
4538                                       + 2 * add_cost (speed, compute_mode));
4539                         t3 = expmed_mult_highpart
4540                           (compute_mode, t2, gen_int_mode (ml, compute_mode),
4541                            NULL_RTX, 1, max_cost - extra_cost);
4542                         if (t3 != 0)
4543                           {
4544                             t4 = expand_shift
4545                               (RSHIFT_EXPR, compute_mode, t3,
4546                                post_shift, NULL_RTX, 1);
4547                             quotient = expand_binop (compute_mode, xor_optab,
4548                                                      t4, t1, tquotient, 0,
4549                                                      OPTAB_WIDEN);
4550                           }
4551                       }
4552                   }
4553               }
4554             else
4555               {
4556                 rtx nsign, t1, t2, t3, t4;
4557                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4558                                                   op0, constm1_rtx), NULL_RTX);
4559                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4560                                    0, OPTAB_WIDEN);
4561                 nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
4562                                       size - 1, NULL_RTX, 0);
4563                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4564                                     NULL_RTX);
4565                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4566                                     NULL_RTX, 0);
4567                 if (t4)
4568                   {
4569                     rtx t5;
4570                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4571                                       NULL_RTX, 0);
4572                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4573                                                             t4, t5),
4574                                               tquotient);
4575                   }
4576               }
4577           }
4578
4579         if (quotient != 0)
4580           break;
4581         delete_insns_since (last);
4582
4583         /* Try using an instruction that produces both the quotient and
4584            remainder, using truncation.  We can easily compensate the quotient
4585            or remainder to get floor rounding, once we have the remainder.
4586            Notice that we compute also the final remainder value here,
4587            and return the result right away.  */
4588         if (target == 0 || GET_MODE (target) != compute_mode)
4589           target = gen_reg_rtx (compute_mode);
4590
4591         if (rem_flag)
4592           {
4593             remainder
4594               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4595             quotient = gen_reg_rtx (compute_mode);
4596           }
4597         else
4598           {
4599             quotient
4600               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4601             remainder = gen_reg_rtx (compute_mode);
4602           }
4603
4604         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4605                                  quotient, remainder, 0))
4606           {
4607             /* This could be computed with a branch-less sequence.
4608                Save that for later.  */
4609             rtx tem;
4610             rtx_code_label *label = gen_label_rtx ();
4611             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4612             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4613                                 NULL_RTX, 0, OPTAB_WIDEN);
4614             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4615             expand_dec (quotient, const1_rtx);
4616             expand_inc (remainder, op1);
4617             emit_label (label);
4618             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4619           }
4620
4621         /* No luck with division elimination or divmod.  Have to do it
4622            by conditionally adjusting op0 *and* the result.  */
4623         {
4624           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4625           rtx adjusted_op0;
4626           rtx tem;
4627
4628           quotient = gen_reg_rtx (compute_mode);
4629           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4630           label1 = gen_label_rtx ();
4631           label2 = gen_label_rtx ();
4632           label3 = gen_label_rtx ();
4633           label4 = gen_label_rtx ();
4634           label5 = gen_label_rtx ();
4635           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4636           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4637           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4638                               quotient, 0, OPTAB_LIB_WIDEN);
4639           if (tem != quotient)
4640             emit_move_insn (quotient, tem);
4641           emit_jump_insn (targetm.gen_jump (label5));
4642           emit_barrier ();
4643           emit_label (label1);
4644           expand_inc (adjusted_op0, const1_rtx);
4645           emit_jump_insn (targetm.gen_jump (label4));
4646           emit_barrier ();
4647           emit_label (label2);
4648           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4649           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4650                               quotient, 0, OPTAB_LIB_WIDEN);
4651           if (tem != quotient)
4652             emit_move_insn (quotient, tem);
4653           emit_jump_insn (targetm.gen_jump (label5));
4654           emit_barrier ();
4655           emit_label (label3);
4656           expand_dec (adjusted_op0, const1_rtx);
4657           emit_label (label4);
4658           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4659                               quotient, 0, OPTAB_LIB_WIDEN);
4660           if (tem != quotient)
4661             emit_move_insn (quotient, tem);
4662           expand_dec (quotient, const1_rtx);
4663           emit_label (label5);
4664         }
4665         break;
4666
4667       case CEIL_DIV_EXPR:
4668       case CEIL_MOD_EXPR:
4669         if (unsignedp)
4670           {
4671             if (op1_is_constant
4672                 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4673                 && (size <= HOST_BITS_PER_WIDE_INT
4674                     || INTVAL (op1) >= 0))
4675               {
4676                 rtx t1, t2, t3;
4677                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4678                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4679                                    floor_log2 (d), tquotient, 1);
4680                 t2 = expand_binop (compute_mode, and_optab, op0,
4681                                    gen_int_mode (d - 1, compute_mode),
4682                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4683                 t3 = gen_reg_rtx (compute_mode);
4684                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4685                                       compute_mode, 1, 1);
4686                 if (t3 == 0)
4687                   {
4688                     rtx_code_label *lab;
4689                     lab = gen_label_rtx ();
4690                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4691                     expand_inc (t1, const1_rtx);
4692                     emit_label (lab);
4693                     quotient = t1;
4694                   }
4695                 else
4696                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4697                                                           t1, t3),
4698                                             tquotient);
4699                 break;
4700               }
4701
4702             /* Try using an instruction that produces both the quotient and
4703                remainder, using truncation.  We can easily compensate the
4704                quotient or remainder to get ceiling rounding, once we have the
4705                remainder.  Notice that we compute also the final remainder
4706                value here, and return the result right away.  */
4707             if (target == 0 || GET_MODE (target) != compute_mode)
4708               target = gen_reg_rtx (compute_mode);
4709
4710             if (rem_flag)
4711               {
4712                 remainder = (REG_P (target)
4713                              ? target : gen_reg_rtx (compute_mode));
4714                 quotient = gen_reg_rtx (compute_mode);
4715               }
4716             else
4717               {
4718                 quotient = (REG_P (target)
4719                             ? target : gen_reg_rtx (compute_mode));
4720                 remainder = gen_reg_rtx (compute_mode);
4721               }
4722
4723             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4724                                      remainder, 1))
4725               {
4726                 /* This could be computed with a branch-less sequence.
4727                    Save that for later.  */
4728                 rtx_code_label *label = gen_label_rtx ();
4729                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4730                                  compute_mode, label);
4731                 expand_inc (quotient, const1_rtx);
4732                 expand_dec (remainder, op1);
4733                 emit_label (label);
4734                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4735               }
4736
4737             /* No luck with division elimination or divmod.  Have to do it
4738                by conditionally adjusting op0 *and* the result.  */
4739             {
4740               rtx_code_label *label1, *label2;
4741               rtx adjusted_op0, tem;
4742
4743               quotient = gen_reg_rtx (compute_mode);
4744               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4745               label1 = gen_label_rtx ();
4746               label2 = gen_label_rtx ();
4747               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4748                                compute_mode, label1);
4749               emit_move_insn  (quotient, const0_rtx);
4750               emit_jump_insn (targetm.gen_jump (label2));
4751               emit_barrier ();
4752               emit_label (label1);
4753               expand_dec (adjusted_op0, const1_rtx);
4754               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4755                                   quotient, 1, OPTAB_LIB_WIDEN);
4756               if (tem != quotient)
4757                 emit_move_insn (quotient, tem);
4758               expand_inc (quotient, const1_rtx);
4759               emit_label (label2);
4760             }
4761           }
4762         else /* signed */
4763           {
4764             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4765                 && INTVAL (op1) >= 0)
4766               {
4767                 /* This is extremely similar to the code for the unsigned case
4768                    above.  For 2.7 we should merge these variants, but for
4769                    2.6.1 I don't want to touch the code for unsigned since that
4770                    get used in C.  The signed case will only be used by other
4771                    languages (Ada).  */
4772
4773                 rtx t1, t2, t3;
4774                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4775                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4776                                    floor_log2 (d), tquotient, 0);
4777                 t2 = expand_binop (compute_mode, and_optab, op0,
4778                                    gen_int_mode (d - 1, compute_mode),
4779                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4780                 t3 = gen_reg_rtx (compute_mode);
4781                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4782                                       compute_mode, 1, 1);
4783                 if (t3 == 0)
4784                   {
4785                     rtx_code_label *lab;
4786                     lab = gen_label_rtx ();
4787                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4788                     expand_inc (t1, const1_rtx);
4789                     emit_label (lab);
4790                     quotient = t1;
4791                   }
4792                 else
4793                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4794                                                           t1, t3),
4795                                             tquotient);
4796                 break;
4797               }
4798
4799             /* Try using an instruction that produces both the quotient and
4800                remainder, using truncation.  We can easily compensate the
4801                quotient or remainder to get ceiling rounding, once we have the
4802                remainder.  Notice that we compute also the final remainder
4803                value here, and return the result right away.  */
4804             if (target == 0 || GET_MODE (target) != compute_mode)
4805               target = gen_reg_rtx (compute_mode);
4806             if (rem_flag)
4807               {
4808                 remainder= (REG_P (target)
4809                             ? target : gen_reg_rtx (compute_mode));
4810                 quotient = gen_reg_rtx (compute_mode);
4811               }
4812             else
4813               {
4814                 quotient = (REG_P (target)
4815                             ? target : gen_reg_rtx (compute_mode));
4816                 remainder = gen_reg_rtx (compute_mode);
4817               }
4818
4819             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4820                                      remainder, 0))
4821               {
4822                 /* This could be computed with a branch-less sequence.
4823                    Save that for later.  */
4824                 rtx tem;
4825                 rtx_code_label *label = gen_label_rtx ();
4826                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4827                                  compute_mode, label);
4828                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4829                                     NULL_RTX, 0, OPTAB_WIDEN);
4830                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4831                 expand_inc (quotient, const1_rtx);
4832                 expand_dec (remainder, op1);
4833                 emit_label (label);
4834                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4835               }
4836
4837             /* No luck with division elimination or divmod.  Have to do it
4838                by conditionally adjusting op0 *and* the result.  */
4839             {
4840               rtx_code_label *label1, *label2, *label3, *label4, *label5;
4841               rtx adjusted_op0;
4842               rtx tem;
4843
4844               quotient = gen_reg_rtx (compute_mode);
4845               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4846               label1 = gen_label_rtx ();
4847               label2 = gen_label_rtx ();
4848               label3 = gen_label_rtx ();
4849               label4 = gen_label_rtx ();
4850               label5 = gen_label_rtx ();
4851               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4852               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4853                                compute_mode, label1);
4854               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4855                                   quotient, 0, OPTAB_LIB_WIDEN);
4856               if (tem != quotient)
4857                 emit_move_insn (quotient, tem);
4858               emit_jump_insn (targetm.gen_jump (label5));
4859               emit_barrier ();
4860               emit_label (label1);
4861               expand_dec (adjusted_op0, const1_rtx);
4862               emit_jump_insn (targetm.gen_jump (label4));
4863               emit_barrier ();
4864               emit_label (label2);
4865               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4866                                compute_mode, label3);
4867               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4868                                   quotient, 0, OPTAB_LIB_WIDEN);
4869               if (tem != quotient)
4870                 emit_move_insn (quotient, tem);
4871               emit_jump_insn (targetm.gen_jump (label5));
4872               emit_barrier ();
4873               emit_label (label3);
4874               expand_inc (adjusted_op0, const1_rtx);
4875               emit_label (label4);
4876               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4877                                   quotient, 0, OPTAB_LIB_WIDEN);
4878               if (tem != quotient)
4879                 emit_move_insn (quotient, tem);
4880               expand_inc (quotient, const1_rtx);
4881               emit_label (label5);
4882             }
4883           }
4884         break;
4885
4886       case EXACT_DIV_EXPR:
4887         if (op1_is_constant && size <= HOST_BITS_PER_WIDE_INT)
4888           {
4889             HOST_WIDE_INT d = INTVAL (op1);
4890             unsigned HOST_WIDE_INT ml;
4891             int pre_shift;
4892             rtx t1;
4893
4894             pre_shift = ctz_or_zero (d);
4895             ml = invert_mod2n (d >> pre_shift, size);
4896             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4897                                pre_shift, NULL_RTX, unsignedp);
4898             quotient = expand_mult (compute_mode, t1,
4899                                     gen_int_mode (ml, compute_mode),
4900                                     NULL_RTX, 1);
4901
4902             insn = get_last_insn ();
4903             set_dst_reg_note (insn, REG_EQUAL,
4904                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4905                                               compute_mode, op0, op1),
4906                               quotient);
4907           }
4908         break;
4909
4910       case ROUND_DIV_EXPR:
4911       case ROUND_MOD_EXPR:
4912         if (unsignedp)
4913           {
4914             rtx tem;
4915             rtx_code_label *label;
4916             label = gen_label_rtx ();
4917             quotient = gen_reg_rtx (compute_mode);
4918             remainder = gen_reg_rtx (compute_mode);
4919             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4920               {
4921                 rtx tem;
4922                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4923                                          quotient, 1, OPTAB_LIB_WIDEN);
4924                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4925                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4926                                           remainder, 1, OPTAB_LIB_WIDEN);
4927               }
4928             tem = plus_constant (compute_mode, op1, -1);
4929             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4930             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4931             expand_inc (quotient, const1_rtx);
4932             expand_dec (remainder, op1);
4933             emit_label (label);
4934           }
4935         else
4936           {
4937             rtx abs_rem, abs_op1, tem, mask;
4938             rtx_code_label *label;
4939             label = gen_label_rtx ();
4940             quotient = gen_reg_rtx (compute_mode);
4941             remainder = gen_reg_rtx (compute_mode);
4942             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4943               {
4944                 rtx tem;
4945                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4946                                          quotient, 0, OPTAB_LIB_WIDEN);
4947                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4948                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4949                                           remainder, 0, OPTAB_LIB_WIDEN);
4950               }
4951             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4952             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4953             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4954                                 1, NULL_RTX, 1);
4955             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4956             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4957                                 NULL_RTX, 0, OPTAB_WIDEN);
4958             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4959                                  size - 1, NULL_RTX, 0);
4960             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4961                                 NULL_RTX, 0, OPTAB_WIDEN);
4962             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4963                                 NULL_RTX, 0, OPTAB_WIDEN);
4964             expand_inc (quotient, tem);
4965             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4966                                 NULL_RTX, 0, OPTAB_WIDEN);
4967             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4968                                 NULL_RTX, 0, OPTAB_WIDEN);
4969             expand_dec (remainder, tem);
4970             emit_label (label);
4971           }
4972         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4973
4974       default:
4975         gcc_unreachable ();
4976       }
4977
4978   if (quotient == 0)
4979     {
4980       if (target && GET_MODE (target) != compute_mode)
4981         target = 0;
4982
4983       if (rem_flag)
4984         {
4985           /* Try to produce the remainder without producing the quotient.
4986              If we seem to have a divmod pattern that does not require widening,
4987              don't try widening here.  We should really have a WIDEN argument
4988              to expand_twoval_binop, since what we'd really like to do here is
4989              1) try a mod insn in compute_mode
4990              2) try a divmod insn in compute_mode
4991              3) try a div insn in compute_mode and multiply-subtract to get
4992                 remainder
4993              4) try the same things with widening allowed.  */
4994           remainder
4995             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4996                                  op0, op1, target,
4997                                  unsignedp,
4998                                  ((optab_handler (optab2, compute_mode)
4999                                    != CODE_FOR_nothing)
5000                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
5001           if (remainder == 0)
5002             {
5003               /* No luck there.  Can we do remainder and divide at once
5004                  without a library call?  */
5005               remainder = gen_reg_rtx (compute_mode);
5006               if (! expand_twoval_binop ((unsignedp
5007                                           ? udivmod_optab
5008                                           : sdivmod_optab),
5009                                          op0, op1,
5010                                          NULL_RTX, remainder, unsignedp))
5011                 remainder = 0;
5012             }
5013
5014           if (remainder)
5015             return gen_lowpart (mode, remainder);
5016         }
5017
5018       /* Produce the quotient.  Try a quotient insn, but not a library call.
5019          If we have a divmod in this mode, use it in preference to widening
5020          the div (for this test we assume it will not fail). Note that optab2
5021          is set to the one of the two optabs that the call below will use.  */
5022       quotient
5023         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5024                              op0, op1, rem_flag ? NULL_RTX : target,
5025                              unsignedp,
5026                              ((optab_handler (optab2, compute_mode)
5027                                != CODE_FOR_nothing)
5028                               ? OPTAB_DIRECT : OPTAB_WIDEN));
5029
5030       if (quotient == 0)
5031         {
5032           /* No luck there.  Try a quotient-and-remainder insn,
5033              keeping the quotient alone.  */
5034           quotient = gen_reg_rtx (compute_mode);
5035           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5036                                      op0, op1,
5037                                      quotient, NULL_RTX, unsignedp))
5038             {
5039               quotient = 0;
5040               if (! rem_flag)
5041                 /* Still no luck.  If we are not computing the remainder,
5042                    use a library call for the quotient.  */
5043                 quotient = sign_expand_binop (compute_mode,
5044                                               udiv_optab, sdiv_optab,
5045                                               op0, op1, target,
5046                                               unsignedp, OPTAB_LIB_WIDEN);
5047             }
5048         }
5049     }
5050
5051   if (rem_flag)
5052     {
5053       if (target && GET_MODE (target) != compute_mode)
5054         target = 0;
5055
5056       if (quotient == 0)
5057         {
5058           /* No divide instruction either.  Use library for remainder.  */
5059           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5060                                          op0, op1, target,
5061                                          unsignedp, OPTAB_LIB_WIDEN);
5062           /* No remainder function.  Try a quotient-and-remainder
5063              function, keeping the remainder.  */
5064           if (!remainder)
5065             {
5066               remainder = gen_reg_rtx (compute_mode);
5067               if (!expand_twoval_binop_libfunc
5068                   (unsignedp ? udivmod_optab : sdivmod_optab,
5069                    op0, op1,
5070                    NULL_RTX, remainder,
5071                    unsignedp ? UMOD : MOD))
5072                 remainder = NULL_RTX;
5073             }
5074         }
5075       else
5076         {
5077           /* We divided.  Now finish doing X - Y * (X / Y).  */
5078           remainder = expand_mult (compute_mode, quotient, op1,
5079                                    NULL_RTX, unsignedp);
5080           remainder = expand_binop (compute_mode, sub_optab, op0,
5081                                     remainder, target, unsignedp,
5082                                     OPTAB_LIB_WIDEN);
5083         }
5084     }
5085
5086   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5087 }
5088 \f
5089 /* Return a tree node with data type TYPE, describing the value of X.
5090    Usually this is an VAR_DECL, if there is no obvious better choice.
5091    X may be an expression, however we only support those expressions
5092    generated by loop.c.  */
5093
5094 tree
5095 make_tree (tree type, rtx x)
5096 {
5097   tree t;
5098
5099   switch (GET_CODE (x))
5100     {
5101     case CONST_INT:
5102     case CONST_WIDE_INT:
5103       t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5104       return t;
5105
5106     case CONST_DOUBLE:
5107       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5108       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5109         t = wide_int_to_tree (type,
5110                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5111                                                     HOST_BITS_PER_WIDE_INT * 2));
5112       else
5113         t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5114
5115       return t;
5116
5117     case CONST_VECTOR:
5118       {
5119         int units = CONST_VECTOR_NUNITS (x);
5120         tree itype = TREE_TYPE (type);
5121         tree *elts;
5122         int i;
5123
5124         /* Build a tree with vector elements.  */
5125         elts = XALLOCAVEC (tree, units);
5126         for (i = units - 1; i >= 0; --i)
5127           {
5128             rtx elt = CONST_VECTOR_ELT (x, i);
5129             elts[i] = make_tree (itype, elt);
5130           }
5131
5132         return build_vector (type, elts);
5133       }
5134
5135     case PLUS:
5136       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5137                           make_tree (type, XEXP (x, 1)));
5138
5139     case MINUS:
5140       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5141                           make_tree (type, XEXP (x, 1)));
5142
5143     case NEG:
5144       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5145
5146     case MULT:
5147       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5148                           make_tree (type, XEXP (x, 1)));
5149
5150     case ASHIFT:
5151       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5152                           make_tree (type, XEXP (x, 1)));
5153
5154     case LSHIFTRT:
5155       t = unsigned_type_for (type);
5156       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5157                                          make_tree (t, XEXP (x, 0)),
5158                                          make_tree (type, XEXP (x, 1))));
5159
5160     case ASHIFTRT:
5161       t = signed_type_for (type);
5162       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5163                                          make_tree (t, XEXP (x, 0)),
5164                                          make_tree (type, XEXP (x, 1))));
5165
5166     case DIV:
5167       if (TREE_CODE (type) != REAL_TYPE)
5168         t = signed_type_for (type);
5169       else
5170         t = type;
5171
5172       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5173                                          make_tree (t, XEXP (x, 0)),
5174                                          make_tree (t, XEXP (x, 1))));
5175     case UDIV:
5176       t = unsigned_type_for (type);
5177       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5178                                          make_tree (t, XEXP (x, 0)),
5179                                          make_tree (t, XEXP (x, 1))));
5180
5181     case SIGN_EXTEND:
5182     case ZERO_EXTEND:
5183       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5184                                           GET_CODE (x) == ZERO_EXTEND);
5185       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5186
5187     case CONST:
5188       return make_tree (type, XEXP (x, 0));
5189
5190     case SYMBOL_REF:
5191       t = SYMBOL_REF_DECL (x);
5192       if (t)
5193         return fold_convert (type, build_fold_addr_expr (t));
5194       /* fall through.  */
5195
5196     default:
5197       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5198
5199       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5200          address mode to pointer mode.  */
5201       if (POINTER_TYPE_P (type))
5202         x = convert_memory_address_addr_space
5203               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5204
5205       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5206          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5207       t->decl_with_rtl.rtl = x;
5208
5209       return t;
5210     }
5211 }
5212 \f
5213 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5214    and returning TARGET.
5215
5216    If TARGET is 0, a pseudo-register or constant is returned.  */
5217
5218 rtx
5219 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5220 {
5221   rtx tem = 0;
5222
5223   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5224     tem = simplify_binary_operation (AND, mode, op0, op1);
5225   if (tem == 0)
5226     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5227
5228   if (target == 0)
5229     target = tem;
5230   else if (tem != target)
5231     emit_move_insn (target, tem);
5232   return target;
5233 }
5234
5235 /* Helper function for emit_store_flag.  */
5236 rtx
5237 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5238              machine_mode mode, machine_mode compare_mode,
5239              int unsignedp, rtx x, rtx y, int normalizep,
5240              machine_mode target_mode)
5241 {
5242   struct expand_operand ops[4];
5243   rtx op0, comparison, subtarget;
5244   rtx_insn *last;
5245   machine_mode result_mode = targetm.cstore_mode (icode);
5246
5247   last = get_last_insn ();
5248   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5249   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5250   if (!x || !y)
5251     {
5252       delete_insns_since (last);
5253       return NULL_RTX;
5254     }
5255
5256   if (target_mode == VOIDmode)
5257     target_mode = result_mode;
5258   if (!target)
5259     target = gen_reg_rtx (target_mode);
5260
5261   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5262
5263   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5264   create_fixed_operand (&ops[1], comparison);
5265   create_fixed_operand (&ops[2], x);
5266   create_fixed_operand (&ops[3], y);
5267   if (!maybe_expand_insn (icode, 4, ops))
5268     {
5269       delete_insns_since (last);
5270       return NULL_RTX;
5271     }
5272   subtarget = ops[0].value;
5273
5274   /* If we are converting to a wider mode, first convert to
5275      TARGET_MODE, then normalize.  This produces better combining
5276      opportunities on machines that have a SIGN_EXTRACT when we are
5277      testing a single bit.  This mostly benefits the 68k.
5278
5279      If STORE_FLAG_VALUE does not have the sign bit set when
5280      interpreted in MODE, we can do this conversion as unsigned, which
5281      is usually more efficient.  */
5282   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5283     {
5284       convert_move (target, subtarget,
5285                     val_signbit_known_clear_p (result_mode,
5286                                                STORE_FLAG_VALUE));
5287       op0 = target;
5288       result_mode = target_mode;
5289     }
5290   else
5291     op0 = subtarget;
5292
5293   /* If we want to keep subexpressions around, don't reuse our last
5294      target.  */
5295   if (optimize)
5296     subtarget = 0;
5297
5298   /* Now normalize to the proper value in MODE.  Sometimes we don't
5299      have to do anything.  */
5300   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5301     ;
5302   /* STORE_FLAG_VALUE might be the most negative number, so write
5303      the comparison this way to avoid a compiler-time warning.  */
5304   else if (- normalizep == STORE_FLAG_VALUE)
5305     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5306
5307   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5308      it hard to use a value of just the sign bit due to ANSI integer
5309      constant typing rules.  */
5310   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5311     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5312                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5313                         normalizep == 1);
5314   else
5315     {
5316       gcc_assert (STORE_FLAG_VALUE & 1);
5317
5318       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5319       if (normalizep == -1)
5320         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5321     }
5322
5323   /* If we were converting to a smaller mode, do the conversion now.  */
5324   if (target_mode != result_mode)
5325     {
5326       convert_move (target, op0, 0);
5327       return target;
5328     }
5329   else
5330     return op0;
5331 }
5332
5333
5334 /* A subroutine of emit_store_flag only including "tricks" that do not
5335    need a recursive call.  These are kept separate to avoid infinite
5336    loops.  */
5337
5338 static rtx
5339 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5340                    machine_mode mode, int unsignedp, int normalizep,
5341                    machine_mode target_mode)
5342 {
5343   rtx subtarget;
5344   enum insn_code icode;
5345   machine_mode compare_mode;
5346   enum mode_class mclass;
5347   enum rtx_code scode;
5348
5349   if (unsignedp)
5350     code = unsigned_condition (code);
5351   scode = swap_condition (code);
5352
5353   /* If one operand is constant, make it the second one.  Only do this
5354      if the other operand is not constant as well.  */
5355
5356   if (swap_commutative_operands_p (op0, op1))
5357     {
5358       std::swap (op0, op1);
5359       code = swap_condition (code);
5360     }
5361
5362   if (mode == VOIDmode)
5363     mode = GET_MODE (op0);
5364
5365   /* For some comparisons with 1 and -1, we can convert this to
5366      comparisons with zero.  This will often produce more opportunities for
5367      store-flag insns.  */
5368
5369   switch (code)
5370     {
5371     case LT:
5372       if (op1 == const1_rtx)
5373         op1 = const0_rtx, code = LE;
5374       break;
5375     case LE:
5376       if (op1 == constm1_rtx)
5377         op1 = const0_rtx, code = LT;
5378       break;
5379     case GE:
5380       if (op1 == const1_rtx)
5381         op1 = const0_rtx, code = GT;
5382       break;
5383     case GT:
5384       if (op1 == constm1_rtx)
5385         op1 = const0_rtx, code = GE;
5386       break;
5387     case GEU:
5388       if (op1 == const1_rtx)
5389         op1 = const0_rtx, code = NE;
5390       break;
5391     case LTU:
5392       if (op1 == const1_rtx)
5393         op1 = const0_rtx, code = EQ;
5394       break;
5395     default:
5396       break;
5397     }
5398
5399   /* If we are comparing a double-word integer with zero or -1, we can
5400      convert the comparison into one involving a single word.  */
5401   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5402       && GET_MODE_CLASS (mode) == MODE_INT
5403       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5404     {
5405       rtx tem;
5406       if ((code == EQ || code == NE)
5407           && (op1 == const0_rtx || op1 == constm1_rtx))
5408         {
5409           rtx op00, op01;
5410
5411           /* Do a logical OR or AND of the two words and compare the
5412              result.  */
5413           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5414           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5415           tem = expand_binop (word_mode,
5416                               op1 == const0_rtx ? ior_optab : and_optab,
5417                               op00, op01, NULL_RTX, unsignedp,
5418                               OPTAB_DIRECT);
5419
5420           if (tem != 0)
5421             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5422                                    unsignedp, normalizep);
5423         }
5424       else if ((code == LT || code == GE) && op1 == const0_rtx)
5425         {
5426           rtx op0h;
5427
5428           /* If testing the sign bit, can just test on high word.  */
5429           op0h = simplify_gen_subreg (word_mode, op0, mode,
5430                                       subreg_highpart_offset (word_mode,
5431                                                               mode));
5432           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5433                                  unsignedp, normalizep);
5434         }
5435       else
5436         tem = NULL_RTX;
5437
5438       if (tem)
5439         {
5440           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5441             return tem;
5442           if (!target)
5443             target = gen_reg_rtx (target_mode);
5444
5445           convert_move (target, tem,
5446                         !val_signbit_known_set_p (word_mode,
5447                                                   (normalizep ? normalizep
5448                                                    : STORE_FLAG_VALUE)));
5449           return target;
5450         }
5451     }
5452
5453   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5454      complement of A (for GE) and shifting the sign bit to the low bit.  */
5455   if (op1 == const0_rtx && (code == LT || code == GE)
5456       && GET_MODE_CLASS (mode) == MODE_INT
5457       && (normalizep || STORE_FLAG_VALUE == 1
5458           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5459     {
5460       subtarget = target;
5461
5462       if (!target)
5463         target_mode = mode;
5464
5465       /* If the result is to be wider than OP0, it is best to convert it
5466          first.  If it is to be narrower, it is *incorrect* to convert it
5467          first.  */
5468       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5469         {
5470           op0 = convert_modes (target_mode, mode, op0, 0);
5471           mode = target_mode;
5472         }
5473
5474       if (target_mode != mode)
5475         subtarget = 0;
5476
5477       if (code == GE)
5478         op0 = expand_unop (mode, one_cmpl_optab, op0,
5479                            ((STORE_FLAG_VALUE == 1 || normalizep)
5480                             ? 0 : subtarget), 0);
5481
5482       if (STORE_FLAG_VALUE == 1 || normalizep)
5483         /* If we are supposed to produce a 0/1 value, we want to do
5484            a logical shift from the sign bit to the low-order bit; for
5485            a -1/0 value, we do an arithmetic shift.  */
5486         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5487                             GET_MODE_BITSIZE (mode) - 1,
5488                             subtarget, normalizep != -1);
5489
5490       if (mode != target_mode)
5491         op0 = convert_modes (target_mode, mode, op0, 0);
5492
5493       return op0;
5494     }
5495
5496   mclass = GET_MODE_CLASS (mode);
5497   for (compare_mode = mode; compare_mode != VOIDmode;
5498        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5499     {
5500      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5501      icode = optab_handler (cstore_optab, optab_mode);
5502      if (icode != CODE_FOR_nothing)
5503         {
5504           do_pending_stack_adjust ();
5505           rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5506                                  unsignedp, op0, op1, normalizep, target_mode);
5507           if (tem)
5508             return tem;
5509
5510           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5511             {
5512               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5513                                  unsignedp, op1, op0, normalizep, target_mode);
5514               if (tem)
5515                 return tem;
5516             }
5517           break;
5518         }
5519     }
5520
5521   return 0;
5522 }
5523
5524 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5525    and storing in TARGET.  Normally return TARGET.
5526    Return 0 if that cannot be done.
5527
5528    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5529    it is VOIDmode, they cannot both be CONST_INT.
5530
5531    UNSIGNEDP is for the case where we have to widen the operands
5532    to perform the operation.  It says to use zero-extension.
5533
5534    NORMALIZEP is 1 if we should convert the result to be either zero
5535    or one.  Normalize is -1 if we should convert the result to be
5536    either zero or -1.  If NORMALIZEP is zero, the result will be left
5537    "raw" out of the scc insn.  */
5538
5539 rtx
5540 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5541                  machine_mode mode, int unsignedp, int normalizep)
5542 {
5543   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5544   enum rtx_code rcode;
5545   rtx subtarget;
5546   rtx tem, trueval;
5547   rtx_insn *last;
5548
5549   /* If we compare constants, we shouldn't use a store-flag operation,
5550      but a constant load.  We can get there via the vanilla route that
5551      usually generates a compare-branch sequence, but will in this case
5552      fold the comparison to a constant, and thus elide the branch.  */
5553   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5554     return NULL_RTX;
5555
5556   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5557                            target_mode);
5558   if (tem)
5559     return tem;
5560
5561   /* If we reached here, we can't do this with a scc insn, however there
5562      are some comparisons that can be done in other ways.  Don't do any
5563      of these cases if branches are very cheap.  */
5564   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5565     return 0;
5566
5567   /* See what we need to return.  We can only return a 1, -1, or the
5568      sign bit.  */
5569
5570   if (normalizep == 0)
5571     {
5572       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5573         normalizep = STORE_FLAG_VALUE;
5574
5575       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5576         ;
5577       else
5578         return 0;
5579     }
5580
5581   last = get_last_insn ();
5582
5583   /* If optimizing, use different pseudo registers for each insn, instead
5584      of reusing the same pseudo.  This leads to better CSE, but slows
5585      down the compiler, since there are more pseudos */
5586   subtarget = (!optimize
5587                && (target_mode == mode)) ? target : NULL_RTX;
5588   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5589
5590   /* For floating-point comparisons, try the reverse comparison or try
5591      changing the "orderedness" of the comparison.  */
5592   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5593     {
5594       enum rtx_code first_code;
5595       bool and_them;
5596
5597       rcode = reverse_condition_maybe_unordered (code);
5598       if (can_compare_p (rcode, mode, ccp_store_flag)
5599           && (code == ORDERED || code == UNORDERED
5600               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5601               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5602         {
5603           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5604                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5605
5606           /* For the reverse comparison, use either an addition or a XOR.  */
5607           if (want_add
5608               && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5609                            optimize_insn_for_speed_p ()) == 0)
5610             {
5611               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5612                                        STORE_FLAG_VALUE, target_mode);
5613               if (tem)
5614                 return expand_binop (target_mode, add_optab, tem,
5615                                      gen_int_mode (normalizep, target_mode),
5616                                      target, 0, OPTAB_WIDEN);
5617             }
5618           else if (!want_add
5619                    && rtx_cost (trueval, mode, XOR, 1,
5620                                 optimize_insn_for_speed_p ()) == 0)
5621             {
5622               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5623                                        normalizep, target_mode);
5624               if (tem)
5625                 return expand_binop (target_mode, xor_optab, tem, trueval,
5626                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5627             }
5628         }
5629
5630       delete_insns_since (last);
5631
5632       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5633       if (code == ORDERED || code == UNORDERED)
5634         return 0;
5635
5636       and_them = split_comparison (code, mode, &first_code, &code);
5637
5638       /* If there are no NaNs, the first comparison should always fall through.
5639          Effectively change the comparison to the other one.  */
5640       if (!HONOR_NANS (mode))
5641         {
5642           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5643           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5644                                     target_mode);
5645         }
5646
5647       if (!HAVE_conditional_move)
5648         return 0;
5649
5650       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5651          conditional move.  */
5652       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5653                                normalizep, target_mode);
5654       if (tem == 0)
5655         return 0;
5656
5657       if (and_them)
5658         tem = emit_conditional_move (target, code, op0, op1, mode,
5659                                      tem, const0_rtx, GET_MODE (tem), 0);
5660       else
5661         tem = emit_conditional_move (target, code, op0, op1, mode,
5662                                      trueval, tem, GET_MODE (tem), 0);
5663
5664       if (tem == 0)
5665         delete_insns_since (last);
5666       return tem;
5667     }
5668
5669   /* The remaining tricks only apply to integer comparisons.  */
5670
5671   if (GET_MODE_CLASS (mode) != MODE_INT)
5672     return 0;
5673
5674   /* If this is an equality comparison of integers, we can try to exclusive-or
5675      (or subtract) the two operands and use a recursive call to try the
5676      comparison with zero.  Don't do any of these cases if branches are
5677      very cheap.  */
5678
5679   if ((code == EQ || code == NE) && op1 != const0_rtx)
5680     {
5681       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5682                           OPTAB_WIDEN);
5683
5684       if (tem == 0)
5685         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5686                             OPTAB_WIDEN);
5687       if (tem != 0)
5688         tem = emit_store_flag (target, code, tem, const0_rtx,
5689                                mode, unsignedp, normalizep);
5690       if (tem != 0)
5691         return tem;
5692
5693       delete_insns_since (last);
5694     }
5695
5696   /* For integer comparisons, try the reverse comparison.  However, for
5697      small X and if we'd have anyway to extend, implementing "X != 0"
5698      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5699   rcode = reverse_condition (code);
5700   if (can_compare_p (rcode, mode, ccp_store_flag)
5701       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5702             && code == NE
5703             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5704             && op1 == const0_rtx))
5705     {
5706       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5707                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5708
5709       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5710       if (want_add
5711           && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5712                        optimize_insn_for_speed_p ()) == 0)
5713         {
5714           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5715                                    STORE_FLAG_VALUE, target_mode);
5716           if (tem != 0)
5717             tem = expand_binop (target_mode, add_optab, tem,
5718                                 gen_int_mode (normalizep, target_mode),
5719                                 target, 0, OPTAB_WIDEN);
5720         }
5721       else if (!want_add
5722                && rtx_cost (trueval, mode, XOR, 1,
5723                             optimize_insn_for_speed_p ()) == 0)
5724         {
5725           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5726                                    normalizep, target_mode);
5727           if (tem != 0)
5728             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5729                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5730         }
5731
5732       if (tem != 0)
5733         return tem;
5734       delete_insns_since (last);
5735     }
5736
5737   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5738      the constant zero.  Reject all other comparisons at this point.  Only
5739      do LE and GT if branches are expensive since they are expensive on
5740      2-operand machines.  */
5741
5742   if (op1 != const0_rtx
5743       || (code != EQ && code != NE
5744           && (BRANCH_COST (optimize_insn_for_speed_p (),
5745                            false) <= 1 || (code != LE && code != GT))))
5746     return 0;
5747
5748   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5749      do the necessary operation below.  */
5750
5751   tem = 0;
5752
5753   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5754      the sign bit set.  */
5755
5756   if (code == LE)
5757     {
5758       /* This is destructive, so SUBTARGET can't be OP0.  */
5759       if (rtx_equal_p (subtarget, op0))
5760         subtarget = 0;
5761
5762       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5763                           OPTAB_WIDEN);
5764       if (tem)
5765         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5766                             OPTAB_WIDEN);
5767     }
5768
5769   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5770      number of bits in the mode of OP0, minus one.  */
5771
5772   if (code == GT)
5773     {
5774       if (rtx_equal_p (subtarget, op0))
5775         subtarget = 0;
5776
5777       tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5778                                 GET_MODE_BITSIZE (mode) - 1,
5779                                 subtarget, 0);
5780       if (tem)
5781         tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5782                             OPTAB_WIDEN);
5783     }
5784
5785   if (code == EQ || code == NE)
5786     {
5787       /* For EQ or NE, one way to do the comparison is to apply an operation
5788          that converts the operand into a positive number if it is nonzero
5789          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5790          for NE we negate.  This puts the result in the sign bit.  Then we
5791          normalize with a shift, if needed.
5792
5793          Two operations that can do the above actions are ABS and FFS, so try
5794          them.  If that doesn't work, and MODE is smaller than a full word,
5795          we can use zero-extension to the wider mode (an unsigned conversion)
5796          as the operation.  */
5797
5798       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5799          that is compensated by the subsequent overflow when subtracting
5800          one / negating.  */
5801
5802       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5803         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5804       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5805         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5806       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5807         {
5808           tem = convert_modes (word_mode, mode, op0, 1);
5809           mode = word_mode;
5810         }
5811
5812       if (tem != 0)
5813         {
5814           if (code == EQ)
5815             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5816                                 0, OPTAB_WIDEN);
5817           else
5818             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5819         }
5820
5821       /* If we couldn't do it that way, for NE we can "or" the two's complement
5822          of the value with itself.  For EQ, we take the one's complement of
5823          that "or", which is an extra insn, so we only handle EQ if branches
5824          are expensive.  */
5825
5826       if (tem == 0
5827           && (code == NE
5828               || BRANCH_COST (optimize_insn_for_speed_p (),
5829                               false) > 1))
5830         {
5831           if (rtx_equal_p (subtarget, op0))
5832             subtarget = 0;
5833
5834           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5835           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5836                               OPTAB_WIDEN);
5837
5838           if (tem && code == EQ)
5839             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5840         }
5841     }
5842
5843   if (tem && normalizep)
5844     tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5845                               GET_MODE_BITSIZE (mode) - 1,
5846                               subtarget, normalizep == 1);
5847
5848   if (tem)
5849     {
5850       if (!target)
5851         ;
5852       else if (GET_MODE (tem) != target_mode)
5853         {
5854           convert_move (target, tem, 0);
5855           tem = target;
5856         }
5857       else if (!subtarget)
5858         {
5859           emit_move_insn (target, tem);
5860           tem = target;
5861         }
5862     }
5863   else
5864     delete_insns_since (last);
5865
5866   return tem;
5867 }
5868
5869 /* Like emit_store_flag, but always succeeds.  */
5870
5871 rtx
5872 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5873                        machine_mode mode, int unsignedp, int normalizep)
5874 {
5875   rtx tem;
5876   rtx_code_label *label;
5877   rtx trueval, falseval;
5878
5879   /* First see if emit_store_flag can do the job.  */
5880   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5881   if (tem != 0)
5882     return tem;
5883
5884   if (!target)
5885     target = gen_reg_rtx (word_mode);
5886
5887   /* If this failed, we have to do this with set/compare/jump/set code.
5888      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5889   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5890   if (code == NE
5891       && GET_MODE_CLASS (mode) == MODE_INT
5892       && REG_P (target)
5893       && op0 == target
5894       && op1 == const0_rtx)
5895     {
5896       label = gen_label_rtx ();
5897       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
5898                                NULL_RTX, NULL, label, -1);
5899       emit_move_insn (target, trueval);
5900       emit_label (label);
5901       return target;
5902     }
5903
5904   if (!REG_P (target)
5905       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5906     target = gen_reg_rtx (GET_MODE (target));
5907
5908   /* Jump in the right direction if the target cannot implement CODE
5909      but can jump on its reverse condition.  */
5910   falseval = const0_rtx;
5911   if (! can_compare_p (code, mode, ccp_jump)
5912       && (! FLOAT_MODE_P (mode)
5913           || code == ORDERED || code == UNORDERED
5914           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5915           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5916     {
5917       enum rtx_code rcode;
5918       if (FLOAT_MODE_P (mode))
5919         rcode = reverse_condition_maybe_unordered (code);
5920       else
5921         rcode = reverse_condition (code);
5922
5923       /* Canonicalize to UNORDERED for the libcall.  */
5924       if (can_compare_p (rcode, mode, ccp_jump)
5925           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5926         {
5927           falseval = trueval;
5928           trueval = const0_rtx;
5929           code = rcode;
5930         }
5931     }
5932
5933   emit_move_insn (target, trueval);
5934   label = gen_label_rtx ();
5935   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
5936                            label, -1);
5937
5938   emit_move_insn (target, falseval);
5939   emit_label (label);
5940
5941   return target;
5942 }
5943 \f
5944 /* Perform possibly multi-word comparison and conditional jump to LABEL
5945    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5946    now a thin wrapper around do_compare_rtx_and_jump.  */
5947
5948 static void
5949 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
5950                  rtx_code_label *label)
5951 {
5952   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5953   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
5954                            NULL, label, -1);
5955 }