gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2016 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "target.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "predict.h"
  30 #include "memmodel.h"
  31 #include "tm_p.h"
  32 #include "expmed.h"
  33 #include "optabs.h"
  34 #include "emit-rtl.h"
  35 #include "diagnostic-core.h"
  36 #include "fold-const.h"
  37 #include "stor-layout.h"
  38 #include "dojump.h"
  39 #include "explow.h"
  40 #include "expr.h"
  41 #include "langhooks.h"
  42
  43 struct target_expmed default_target_expmed;
  44 #if SWITCHABLE_TARGET
  45 struct target_expmed *this_target_expmed = &default_target_expmed;
  46 #endif
  47
  48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  49                                    unsigned HOST_WIDE_INT,
  50                                    unsigned HOST_WIDE_INT,
  51                                    unsigned HOST_WIDE_INT,
  52                                    rtx, bool);
  53 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
  54                                      unsigned HOST_WIDE_INT,
  55                                      rtx, bool);
  56 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  57                                    unsigned HOST_WIDE_INT,
  58                                    unsigned HOST_WIDE_INT,
  59                                    unsigned HOST_WIDE_INT,
  60                                    rtx, bool);
  61 static rtx extract_fixed_bit_field (machine_mode, rtx,
  62                                     unsigned HOST_WIDE_INT,
  63                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  64 static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
  65                                       unsigned HOST_WIDE_INT,
  66                                       unsigned HOST_WIDE_INT, rtx, int, bool);
  67 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  68 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  69                                     unsigned HOST_WIDE_INT, int, bool);
  70 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  71 static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  72 static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  73
  74 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  75    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  76    The mask is truncated if necessary to the width of mode MODE.  The
  77    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
  78
  79 static inline rtx
  80 mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
  81 {
  82   return immed_wide_int_const
  83     (wi::shifted_mask (bitpos, bitsize, complement,
  84                        GET_MODE_PRECISION (mode)), mode);
  85 }
  86
  87 /* Test whether a value is zero of a power of two.  */
  88 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
  89   (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
  90
  91 struct init_expmed_rtl
  92 {
  93   rtx reg;
  94   rtx plus;
  95   rtx neg;
  96   rtx mult;
  97   rtx sdiv;
  98   rtx udiv;
  99   rtx sdiv_32;
 100   rtx smod_32;
 101   rtx wide_mult;
 102   rtx wide_lshr;
 103   rtx wide_trunc;
 104   rtx shift;
 105   rtx shift_mult;
 106   rtx shift_add;
 107   rtx shift_sub0;
 108   rtx shift_sub1;
 109   rtx zext;
 110   rtx trunc;
 111
 112   rtx pow2[MAX_BITS_PER_WORD];
 113   rtx cint[MAX_BITS_PER_WORD];
 114 };
 115
 116 static void
 117 init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
 118                       machine_mode from_mode, bool speed)
 119 {
 120   int to_size, from_size;
 121   rtx which;
 122
 123   to_size = GET_MODE_PRECISION (to_mode);
 124   from_size = GET_MODE_PRECISION (from_mode);
 125
 126   /* Most partial integers have a precision less than the "full"
 127      integer it requires for storage.  In case one doesn't, for
 128      comparison purposes here, reduce the bit size by one in that
 129      case.  */
 130   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 131       && pow2p_hwi (to_size))
 132     to_size --;
 133   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 134       && pow2p_hwi (from_size))
 135     from_size --;
 136
 137   /* Assume cost of zero-extend and sign-extend is the same.  */
 138   which = (to_size < from_size ? all->trunc : all->zext);
 139
 140   PUT_MODE (all->reg, from_mode);
 141   set_convert_cost (to_mode, from_mode, speed,
 142                     set_src_cost (which, to_mode, speed));
 143 }
 144
 145 static void
 146 init_expmed_one_mode (struct init_expmed_rtl *all,
 147                       machine_mode mode, int speed)
 148 {
 149   int m, n, mode_bitsize;
 150   machine_mode mode_from;
 151
 152   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 153
 154   PUT_MODE (all->reg, mode);
 155   PUT_MODE (all->plus, mode);
 156   PUT_MODE (all->neg, mode);
 157   PUT_MODE (all->mult, mode);
 158   PUT_MODE (all->sdiv, mode);
 159   PUT_MODE (all->udiv, mode);
 160   PUT_MODE (all->sdiv_32, mode);
 161   PUT_MODE (all->smod_32, mode);
 162   PUT_MODE (all->wide_trunc, mode);
 163   PUT_MODE (all->shift, mode);
 164   PUT_MODE (all->shift_mult, mode);
 165   PUT_MODE (all->shift_add, mode);
 166   PUT_MODE (all->shift_sub0, mode);
 167   PUT_MODE (all->shift_sub1, mode);
 168   PUT_MODE (all->zext, mode);
 169   PUT_MODE (all->trunc, mode);
 170
 171   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
 172   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
 173   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
 174   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
 175   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
 176
 177   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
 178                                      <= 2 * add_cost (speed, mode)));
 179   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
 180                                      <= 4 * add_cost (speed, mode)));
 181
 182   set_shift_cost (speed, mode, 0, 0);
 183   {
 184     int cost = add_cost (speed, mode);
 185     set_shiftadd_cost (speed, mode, 0, cost);
 186     set_shiftsub0_cost (speed, mode, 0, cost);
 187     set_shiftsub1_cost (speed, mode, 0, cost);
 188   }
 189
 190   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 191   for (m = 1; m < n; m++)
 192     {
 193       XEXP (all->shift, 1) = all->cint[m];
 194       XEXP (all->shift_mult, 1) = all->pow2[m];
 195
 196       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
 197       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
 198                                                        speed));
 199       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
 200                                                         speed));
 201       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
 202                                                         speed));
 203     }
 204
 205   if (SCALAR_INT_MODE_P (mode))
 206     {
 207       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 208            mode_from = (machine_mode)(mode_from + 1))
 209         init_expmed_one_conv (all, mode, mode_from, speed);
 210     }
 211   if (GET_MODE_CLASS (mode) == MODE_INT)
 212     {
 213       machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
 214       if (wider_mode != VOIDmode)
 215         {
 216           PUT_MODE (all->zext, wider_mode);
 217           PUT_MODE (all->wide_mult, wider_mode);
 218           PUT_MODE (all->wide_lshr, wider_mode);
 219           XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 220
 221           set_mul_widen_cost (speed, wider_mode,
 222                               set_src_cost (all->wide_mult, wider_mode, speed));
 223           set_mul_highpart_cost (speed, mode,
 224                                  set_src_cost (all->wide_trunc, mode, speed));
 225         }
 226     }
 227 }
 228
 229 void
 230 init_expmed (void)
 231 {
 232   struct init_expmed_rtl all;
 233   machine_mode mode = QImode;
 234   int m, speed;
 235
 236   memset (&all, 0, sizeof all);
 237   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 238     {
 239       all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
 240       all.cint[m] = GEN_INT (m);
 241     }
 242
 243   /* Avoid using hard regs in ways which may be unsupported.  */
 244   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 245   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 246   all.neg = gen_rtx_NEG (mode, all.reg);
 247   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 248   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 249   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 250   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 251   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 252   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 253   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 254   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 255   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 256   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 257   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 258   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 259   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 260   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 261   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 262
 263   for (speed = 0; speed < 2; speed++)
 264     {
 265       crtl->maybe_hot_insn_p = speed;
 266       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
 267
 268       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 269            mode = (machine_mode)(mode + 1))
 270         init_expmed_one_mode (&all, mode, speed);
 271
 272       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 273         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 274              mode = (machine_mode)(mode + 1))
 275           init_expmed_one_mode (&all, mode, speed);
 276
 277       if (MIN_MODE_VECTOR_INT != VOIDmode)
 278         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 279              mode = (machine_mode)(mode + 1))
 280           init_expmed_one_mode (&all, mode, speed);
 281     }
 282
 283   if (alg_hash_used_p ())
 284     {
 285       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 286       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 287     }
 288   else
 289     set_alg_hash_used_p (true);
 290   default_rtl_profile ();
 291
 292   ggc_free (all.trunc);
 293   ggc_free (all.shift_sub1);
 294   ggc_free (all.shift_sub0);
 295   ggc_free (all.shift_add);
 296   ggc_free (all.shift_mult);
 297   ggc_free (all.shift);
 298   ggc_free (all.wide_trunc);
 299   ggc_free (all.wide_lshr);
 300   ggc_free (all.wide_mult);
 301   ggc_free (all.zext);
 302   ggc_free (all.smod_32);
 303   ggc_free (all.sdiv_32);
 304   ggc_free (all.udiv);
 305   ggc_free (all.sdiv);
 306   ggc_free (all.mult);
 307   ggc_free (all.neg);
 308   ggc_free (all.plus);
 309   ggc_free (all.reg);
 310 }
 311
 312 /* Return an rtx representing minus the value of X.
 313    MODE is the intended mode of the result,
 314    useful if X is a CONST_INT.  */
 315
 316 rtx
 317 negate_rtx (machine_mode mode, rtx x)
 318 {
 319   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 320
 321   if (result == 0)
 322     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 323
 324   return result;
 325 }
 326
 327 /* Whether reverse storage order is supported on the target.  */
 328 static int reverse_storage_order_supported = -1;
 329
 330 /* Check whether reverse storage order is supported on the target.  */
 331
 332 static void
 333 check_reverse_storage_order_support (void)
 334 {
 335   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 336     {
 337       reverse_storage_order_supported = 0;
 338       sorry ("reverse scalar storage order");
 339     }
 340   else
 341     reverse_storage_order_supported = 1;
 342 }
 343
 344 /* Whether reverse FP storage order is supported on the target.  */
 345 static int reverse_float_storage_order_supported = -1;
 346
 347 /* Check whether reverse FP storage order is supported on the target.  */
 348
 349 static void
 350 check_reverse_float_storage_order_support (void)
 351 {
 352   if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 353     {
 354       reverse_float_storage_order_supported = 0;
 355       sorry ("reverse floating-point scalar storage order");
 356     }
 357   else
 358     reverse_float_storage_order_supported = 1;
 359 }
 360
 361 /* Return an rtx representing value of X with reverse storage order.
 362    MODE is the intended mode of the result,
 363    useful if X is a CONST_INT.  */
 364
 365 rtx
 366 flip_storage_order (enum machine_mode mode, rtx x)
 367 {
 368   enum machine_mode int_mode;
 369   rtx result;
 370
 371   if (mode == QImode)
 372     return x;
 373
 374   if (COMPLEX_MODE_P (mode))
 375     {
 376       rtx real = read_complex_part (x, false);
 377       rtx imag = read_complex_part (x, true);
 378
 379       real = flip_storage_order (GET_MODE_INNER (mode), real);
 380       imag = flip_storage_order (GET_MODE_INNER (mode), imag);
 381
 382       return gen_rtx_CONCAT (mode, real, imag);
 383     }
 384
 385   if (__builtin_expect (reverse_storage_order_supported < 0, 0))
 386     check_reverse_storage_order_support ();
 387
 388   if (SCALAR_INT_MODE_P (mode))
 389     int_mode = mode;
 390   else
 391     {
 392       if (FLOAT_MODE_P (mode)
 393           && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
 394         check_reverse_float_storage_order_support ();
 395
 396       int_mode = mode_for_size (GET_MODE_PRECISION (mode), MODE_INT, 0);
 397       if (int_mode == BLKmode)
 398         {
 399           sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
 400           return x;
 401         }
 402       x = gen_lowpart (int_mode, x);
 403     }
 404
 405   result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
 406   if (result == 0)
 407     result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
 408
 409   if (int_mode != mode)
 410     result = gen_lowpart (mode, result);
 411
 412   return result;
 413 }
 414
 415 /* Adjust bitfield memory MEM so that it points to the first unit of mode
 416    MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
 417    If MODE is BLKmode, return a reference to every byte in the bitfield.
 418    Set *NEW_BITNUM to the bit position of the field within the new memory.  */
 419
 420 static rtx
 421 narrow_bit_field_mem (rtx mem, machine_mode mode,
 422                       unsigned HOST_WIDE_INT bitsize,
 423                       unsigned HOST_WIDE_INT bitnum,
 424                       unsigned HOST_WIDE_INT *new_bitnum)
 425 {
 426   if (mode == BLKmode)
 427     {
 428       *new_bitnum = bitnum % BITS_PER_UNIT;
 429       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 430       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 431                             / BITS_PER_UNIT);
 432       return adjust_bitfield_address_size (mem, mode, offset, size);
 433     }
 434   else
 435     {
 436       unsigned int unit = GET_MODE_BITSIZE (mode);
 437       *new_bitnum = bitnum % unit;
 438       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 439       return adjust_bitfield_address (mem, mode, offset);
 440     }
 441 }
 442
 443 /* The caller wants to perform insertion or extraction PATTERN on a
 444    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 445    BITREGION_START and BITREGION_END are as for store_bit_field
 446    and FIELDMODE is the natural mode of the field.
 447
 448    Search for a mode that is compatible with the memory access
 449    restrictions and (where applicable) with a register insertion or
 450    extraction.  Return the new memory on success, storing the adjusted
 451    bit position in *NEW_BITNUM.  Return null otherwise.  */
 452
 453 static rtx
 454 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 455                               rtx op0, HOST_WIDE_INT bitsize,
 456                               HOST_WIDE_INT bitnum,
 457                               unsigned HOST_WIDE_INT bitregion_start,
 458                               unsigned HOST_WIDE_INT bitregion_end,
 459                               machine_mode fieldmode,
 460                               unsigned HOST_WIDE_INT *new_bitnum)
 461 {
 462   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 463                                 bitregion_end, MEM_ALIGN (op0),
 464                                 MEM_VOLATILE_P (op0));
 465   machine_mode best_mode;
 466   if (iter.next_mode (&best_mode))
 467     {
 468       /* We can use a memory in BEST_MODE.  See whether this is true for
 469          any wider modes.  All other things being equal, we prefer to
 470          use the widest mode possible because it tends to expose more
 471          CSE opportunities.  */
 472       if (!iter.prefer_smaller_modes ())
 473         {
 474           /* Limit the search to the mode required by the corresponding
 475              register insertion or extraction instruction, if any.  */
 476           machine_mode limit_mode = word_mode;
 477           extraction_insn insn;
 478           if (get_best_reg_extraction_insn (&insn, pattern,
 479                                             GET_MODE_BITSIZE (best_mode),
 480                                             fieldmode))
 481             limit_mode = insn.field_mode;
 482
 483           machine_mode wider_mode;
 484           while (iter.next_mode (&wider_mode)
 485                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 486             best_mode = wider_mode;
 487         }
 488       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 489                                    new_bitnum);
 490     }
 491   return NULL_RTX;
 492 }
 493
 494 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 495    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 496    offset is then BITNUM / BITS_PER_UNIT.  */
 497
 498 static bool
 499 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 500                      unsigned HOST_WIDE_INT bitsize,
 501                      machine_mode struct_mode)
 502 {
 503   if (BYTES_BIG_ENDIAN)
 504     return (bitnum % BITS_PER_UNIT == 0
 505             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 506                 || (bitnum + bitsize) % BITS_PER_WORD == 0));
 507   else
 508     return bitnum % BITS_PER_WORD == 0;
 509 }
 510
 511 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 512    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 513    Return false if the access would touch memory outside the range
 514    BITREGION_START to BITREGION_END for conformance to the C++ memory
 515    model.  */
 516
 517 static bool
 518 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 519                             unsigned HOST_WIDE_INT bitnum,
 520                             machine_mode fieldmode,
 521                             unsigned HOST_WIDE_INT bitregion_start,
 522                             unsigned HOST_WIDE_INT bitregion_end)
 523 {
 524   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 525
 526   /* -fstrict-volatile-bitfields must be enabled and we must have a
 527      volatile MEM.  */
 528   if (!MEM_P (op0)
 529       || !MEM_VOLATILE_P (op0)
 530       || flag_strict_volatile_bitfields <= 0)
 531     return false;
 532
 533   /* Non-integral modes likely only happen with packed structures.
 534      Punt.  */
 535   if (!SCALAR_INT_MODE_P (fieldmode))
 536     return false;
 537
 538   /* The bit size must not be larger than the field mode, and
 539      the field mode must not be larger than a word.  */
 540   if (bitsize > modesize || modesize > BITS_PER_WORD)
 541     return false;
 542
 543   /* Check for cases of unaligned fields that must be split.  */
 544   if (bitnum % modesize + bitsize > modesize)
 545     return false;
 546
 547   /* The memory must be sufficiently aligned for a MODESIZE access.
 548      This condition guarantees, that the memory access will not
 549      touch anything after the end of the structure.  */
 550   if (MEM_ALIGN (op0) < modesize)
 551     return false;
 552
 553   /* Check for cases where the C++ memory model applies.  */
 554   if (bitregion_end != 0
 555       && (bitnum - bitnum % modesize < bitregion_start
 556           || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
 557     return false;
 558
 559   return true;
 560 }
 561
 562 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 563    bit number BITNUM can be treated as a simple value of mode MODE.  */
 564
 565 static bool
 566 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 567                        unsigned HOST_WIDE_INT bitnum, machine_mode mode)
 568 {
 569   return (MEM_P (op0)
 570           && bitnum % BITS_PER_UNIT == 0
 571           && bitsize == GET_MODE_BITSIZE (mode)
 572           && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
 573               || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
 574                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 575 }
 576 \f
 577 /* Try to use instruction INSV to store VALUE into a field of OP0.
 578    BITSIZE and BITNUM are as for store_bit_field.  */
 579
 580 static bool
 581 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 582                             unsigned HOST_WIDE_INT bitsize,
 583                             unsigned HOST_WIDE_INT bitnum,
 584                             rtx value)
 585 {
 586   struct expand_operand ops[4];
 587   rtx value1;
 588   rtx xop0 = op0;
 589   rtx_insn *last = get_last_insn ();
 590   bool copy_back = false;
 591
 592   machine_mode op_mode = insv->field_mode;
 593   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 594   if (bitsize == 0 || bitsize > unit)
 595     return false;
 596
 597   if (MEM_P (xop0))
 598     /* Get a reference to the first byte of the field.  */
 599     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 600                                  &bitnum);
 601   else
 602     {
 603       /* Convert from counting within OP0 to counting in OP_MODE.  */
 604       if (BYTES_BIG_ENDIAN)
 605         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 606
 607       /* If xop0 is a register, we need it in OP_MODE
 608          to make it acceptable to the format of insv.  */
 609       if (GET_CODE (xop0) == SUBREG)
 610         /* We can't just change the mode, because this might clobber op0,
 611            and we will need the original value of op0 if insv fails.  */
 612         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 613       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 614         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 615     }
 616
 617   /* If the destination is a paradoxical subreg such that we need a
 618      truncate to the inner mode, perform the insertion on a temporary and
 619      truncate the result to the original destination.  Note that we can't
 620      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 621      X) 0)) is (reg:N X).  */
 622   if (GET_CODE (xop0) == SUBREG
 623       && REG_P (SUBREG_REG (xop0))
 624       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 625                                          op_mode))
 626     {
 627       rtx tem = gen_reg_rtx (op_mode);
 628       emit_move_insn (tem, xop0);
 629       xop0 = tem;
 630       copy_back = true;
 631     }
 632
 633   /* There are similar overflow check at the start of store_bit_field_1,
 634      but that only check the situation where the field lies completely
 635      outside the register, while there do have situation where the field
 636      lies partialy in the register, we need to adjust bitsize for this
 637      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
 638      will broken on those arch support bit insert instruction, like arm, aarch64
 639      etc.  */
 640   if (bitsize + bitnum > unit && bitnum < unit)
 641     {
 642       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
 643                "destination object, data truncated into %wu-bit",
 644                bitsize, unit - bitnum);
 645       bitsize = unit - bitnum;
 646     }
 647
 648   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 649      "backwards" from the size of the unit we are inserting into.
 650      Otherwise, we count bits from the most significant on a
 651      BYTES/BITS_BIG_ENDIAN machine.  */
 652
 653   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 654     bitnum = unit - bitsize - bitnum;
 655
 656   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 657   value1 = value;
 658   if (GET_MODE (value) != op_mode)
 659     {
 660       if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 661         {
 662           rtx tmp;
 663           /* Optimization: Don't bother really extending VALUE
 664              if it has all the bits we will actually use.  However,
 665              if we must narrow it, be sure we do it correctly.  */
 666
 667           if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 668             {
 669               tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 670               if (! tmp)
 671                 tmp = simplify_gen_subreg (op_mode,
 672                                            force_reg (GET_MODE (value),
 673                                                       value1),
 674                                            GET_MODE (value), 0);
 675             }
 676           else
 677             {
 678               tmp = gen_lowpart_if_possible (op_mode, value1);
 679               if (! tmp)
 680                 tmp = gen_lowpart (op_mode, force_reg (GET_MODE (value),
 681                                                        value1));
 682             }
 683           value1 = tmp;
 684         }
 685       else if (CONST_INT_P (value))
 686         value1 = gen_int_mode (INTVAL (value), op_mode);
 687       else
 688         /* Parse phase is supposed to make VALUE's data type
 689            match that of the component reference, which is a type
 690            at least as wide as the field; so VALUE should have
 691            a mode that corresponds to that type.  */
 692         gcc_assert (CONSTANT_P (value));
 693     }
 694
 695   create_fixed_operand (&ops[0], xop0);
 696   create_integer_operand (&ops[1], bitsize);
 697   create_integer_operand (&ops[2], bitnum);
 698   create_input_operand (&ops[3], value1, op_mode);
 699   if (maybe_expand_insn (insv->icode, 4, ops))
 700     {
 701       if (copy_back)
 702         convert_move (op0, xop0, true);
 703       return true;
 704     }
 705   delete_insns_since (last);
 706   return false;
 707 }
 708
 709 /* A subroutine of store_bit_field, with the same arguments.  Return true
 710    if the operation could be implemented.
 711
 712    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 713    no other way of implementing the operation.  If FALLBACK_P is false,
 714    return false instead.  */
 715
 716 static bool
 717 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 718                    unsigned HOST_WIDE_INT bitnum,
 719                    unsigned HOST_WIDE_INT bitregion_start,
 720                    unsigned HOST_WIDE_INT bitregion_end,
 721                    machine_mode fieldmode,
 722                    rtx value, bool reverse, bool fallback_p)
 723 {
 724   rtx op0 = str_rtx;
 725   rtx orig_value;
 726
 727   while (GET_CODE (op0) == SUBREG)
 728     {
 729       /* The following line once was done only if WORDS_BIG_ENDIAN,
 730          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 731          meaningful at a much higher level; when structures are copied
 732          between memory and regs, the higher-numbered regs
 733          always get higher addresses.  */
 734       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 735       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 736       int byte_offset = 0;
 737
 738       /* Paradoxical subregs need special handling on big-endian machines.  */
 739       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 740         {
 741           int difference = inner_mode_size - outer_mode_size;
 742
 743           if (WORDS_BIG_ENDIAN)
 744             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 745           if (BYTES_BIG_ENDIAN)
 746             byte_offset += difference % UNITS_PER_WORD;
 747         }
 748       else
 749         byte_offset = SUBREG_BYTE (op0);
 750
 751       bitnum += byte_offset * BITS_PER_UNIT;
 752       op0 = SUBREG_REG (op0);
 753     }
 754
 755   /* No action is needed if the target is a register and if the field
 756      lies completely outside that register.  This can occur if the source
 757      code contains an out-of-bounds access to a small array.  */
 758   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 759     return true;
 760
 761   /* Use vec_set patterns for inserting parts of vectors whenever
 762      available.  */
 763   if (VECTOR_MODE_P (GET_MODE (op0))
 764       && !MEM_P (op0)
 765       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 766       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 767       && bitsize == GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
 768       && !(bitnum % GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
 769     {
 770       struct expand_operand ops[3];
 771       machine_mode outermode = GET_MODE (op0);
 772       machine_mode innermode = GET_MODE_INNER (outermode);
 773       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 774       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 775
 776       create_fixed_operand (&ops[0], op0);
 777       create_input_operand (&ops[1], value, innermode);
 778       create_integer_operand (&ops[2], pos);
 779       if (maybe_expand_insn (icode, 3, ops))
 780         return true;
 781     }
 782
 783   /* If the target is a register, overwriting the entire object, or storing
 784      a full-word or multi-word field can be done with just a SUBREG.  */
 785   if (!MEM_P (op0)
 786       && bitsize == GET_MODE_BITSIZE (fieldmode)
 787       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 788           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 789     {
 790       /* Use the subreg machinery either to narrow OP0 to the required
 791          words or to cope with mode punning between equal-sized modes.
 792          In the latter case, use subreg on the rhs side, not lhs.  */
 793       rtx sub;
 794
 795       if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
 796         {
 797           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 798           if (sub)
 799             {
 800               if (reverse)
 801                 sub = flip_storage_order (GET_MODE (op0), sub);
 802               emit_move_insn (op0, sub);
 803               return true;
 804             }
 805         }
 806       else
 807         {
 808           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 809                                      bitnum / BITS_PER_UNIT);
 810           if (sub)
 811             {
 812               if (reverse)
 813                 value = flip_storage_order (fieldmode, value);
 814               emit_move_insn (sub, value);
 815               return true;
 816             }
 817         }
 818     }
 819
 820   /* If the target is memory, storing any naturally aligned field can be
 821      done with a simple store.  For targets that support fast unaligned
 822      memory, any naturally sized, unit aligned field can be done directly.  */
 823   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
 824     {
 825       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 826       if (reverse)
 827         value = flip_storage_order (fieldmode, value);
 828       emit_move_insn (op0, value);
 829       return true;
 830     }
 831
 832   /* Make sure we are playing with integral modes.  Pun with subregs
 833      if we aren't.  This must come after the entire register case above,
 834      since that case is valid for any mode.  The following cases are only
 835      valid for integral modes.  */
 836   {
 837     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 838     if (imode != GET_MODE (op0))
 839       {
 840         if (MEM_P (op0))
 841           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
 842         else
 843           {
 844             gcc_assert (imode != BLKmode);
 845             op0 = gen_lowpart (imode, op0);
 846           }
 847       }
 848   }
 849
 850   /* Storing an lsb-aligned field in a register
 851      can be done with a movstrict instruction.  */
 852
 853   if (!MEM_P (op0)
 854       && !reverse
 855       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 856       && bitsize == GET_MODE_BITSIZE (fieldmode)
 857       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 858     {
 859       struct expand_operand ops[2];
 860       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 861       rtx arg0 = op0;
 862       unsigned HOST_WIDE_INT subreg_off;
 863
 864       if (GET_CODE (arg0) == SUBREG)
 865         {
 866           /* Else we've got some float mode source being extracted into
 867              a different float mode destination -- this combination of
 868              subregs results in Severe Tire Damage.  */
 869           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 870                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 871                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 872           arg0 = SUBREG_REG (arg0);
 873         }
 874
 875       subreg_off = bitnum / BITS_PER_UNIT;
 876       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 877         {
 878           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 879
 880           create_fixed_operand (&ops[0], arg0);
 881           /* Shrink the source operand to FIELDMODE.  */
 882           create_convert_operand_to (&ops[1], value, fieldmode, false);
 883           if (maybe_expand_insn (icode, 2, ops))
 884             return true;
 885         }
 886     }
 887
 888   /* Handle fields bigger than a word.  */
 889
 890   if (bitsize > BITS_PER_WORD)
 891     {
 892       /* Here we transfer the words of the field
 893          in the order least significant first.
 894          This is because the most significant word is the one which may
 895          be less than full.
 896          However, only do that if the value is not BLKmode.  */
 897
 898       const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 899       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 900       unsigned int i;
 901       rtx_insn *last;
 902
 903       /* This is the mode we must force value to, so that there will be enough
 904          subwords to extract.  Note that fieldmode will often (always?) be
 905          VOIDmode, because that is what store_field uses to indicate that this
 906          is a bit field, but passing VOIDmode to operand_subword_force
 907          is not allowed.  */
 908       fieldmode = GET_MODE (value);
 909       if (fieldmode == VOIDmode)
 910         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 911
 912       last = get_last_insn ();
 913       for (i = 0; i < nwords; i++)
 914         {
 915           /* If I is 0, use the low-order word in both field and target;
 916              if I is 1, use the next to lowest word; and so on.  */
 917           unsigned int wordnum = (backwards
 918                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 919                                   - i - 1
 920                                   : i);
 921           unsigned int bit_offset = (backwards ^ reverse
 922                                      ? MAX ((int) bitsize - ((int) i + 1)
 923                                             * BITS_PER_WORD,
 924                                             0)
 925                                      : (int) i * BITS_PER_WORD);
 926           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 927           unsigned HOST_WIDE_INT new_bitsize =
 928             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 929
 930           /* If the remaining chunk doesn't have full wordsize we have
 931              to make sure that for big-endian machines the higher order
 932              bits are used.  */
 933           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 934             value_word = simplify_expand_binop (word_mode, lshr_optab,
 935                                                 value_word,
 936                                                 GEN_INT (BITS_PER_WORD
 937                                                          - new_bitsize),
 938                                                 NULL_RTX, true,
 939                                                 OPTAB_LIB_WIDEN);
 940
 941           if (!store_bit_field_1 (op0, new_bitsize,
 942                                   bitnum + bit_offset,
 943                                   bitregion_start, bitregion_end,
 944                                   word_mode,
 945                                   value_word, reverse, fallback_p))
 946             {
 947               delete_insns_since (last);
 948               return false;
 949             }
 950         }
 951       return true;
 952     }
 953
 954   /* If VALUE has a floating-point or complex mode, access it as an
 955      integer of the corresponding size.  This can occur on a machine
 956      with 64 bit registers that uses SFmode for float.  It can also
 957      occur for unaligned float or complex fields.  */
 958   orig_value = value;
 959   if (GET_MODE (value) != VOIDmode
 960       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 961       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 962     {
 963       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 964       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 965     }
 966
 967   /* If OP0 is a multi-word register, narrow it to the affected word.
 968      If the region spans two words, defer to store_split_bit_field.  */
 969   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 970     {
 971       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
 972         {
 973           if (!fallback_p)
 974             return false;
 975
 976           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 977                                  bitregion_end, value, reverse);
 978           return true;
 979         }
 980       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
 981                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 982       gcc_assert (op0);
 983       bitnum %= BITS_PER_WORD;
 984     }
 985
 986   /* From here on we can assume that the field to be stored in fits
 987      within a word.  If the destination is a register, it too fits
 988      in a word.  */
 989
 990   extraction_insn insv;
 991   if (!MEM_P (op0)
 992       && !reverse
 993       && get_best_reg_extraction_insn (&insv, EP_insv,
 994                                        GET_MODE_BITSIZE (GET_MODE (op0)),
 995                                        fieldmode)
 996       && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 997     return true;
 998
 999   /* If OP0 is a memory, try copying it to a register and seeing if a
1000      cheap register alternative is available.  */
1001   if (MEM_P (op0) && !reverse)
1002     {
1003       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1004                                         fieldmode)
1005           && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
1006         return true;
1007
1008       rtx_insn *last = get_last_insn ();
1009
1010       /* Try loading part of OP0 into a register, inserting the bitfield
1011          into that, and then copying the result back to OP0.  */
1012       unsigned HOST_WIDE_INT bitpos;
1013       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1014                                                bitregion_start, bitregion_end,
1015                                                fieldmode, &bitpos);
1016       if (xop0)
1017         {
1018           rtx tempreg = copy_to_reg (xop0);
1019           if (store_bit_field_1 (tempreg, bitsize, bitpos,
1020                                  bitregion_start, bitregion_end,
1021                                  fieldmode, orig_value, reverse, false))
1022             {
1023               emit_move_insn (xop0, tempreg);
1024               return true;
1025             }
1026           delete_insns_since (last);
1027         }
1028     }
1029
1030   if (!fallback_p)
1031     return false;
1032
1033   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
1034                          bitregion_end, value, reverse);
1035   return true;
1036 }
1037
1038 /* Generate code to store value from rtx VALUE
1039    into a bit-field within structure STR_RTX
1040    containing BITSIZE bits starting at bit BITNUM.
1041
1042    BITREGION_START is bitpos of the first bitfield in this region.
1043    BITREGION_END is the bitpos of the ending bitfield in this region.
1044    These two fields are 0, if the C++ memory model does not apply,
1045    or we are not interested in keeping track of bitfield regions.
1046
1047    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1048
1049    If REVERSE is true, the store is to be done in reverse order.  */
1050
1051 void
1052 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1053                  unsigned HOST_WIDE_INT bitnum,
1054                  unsigned HOST_WIDE_INT bitregion_start,
1055                  unsigned HOST_WIDE_INT bitregion_end,
1056                  machine_mode fieldmode,
1057                  rtx value, bool reverse)
1058 {
1059   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1060   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
1061                                   bitregion_start, bitregion_end))
1062     {
1063       /* Storing of a full word can be done with a simple store.
1064          We know here that the field can be accessed with one single
1065          instruction.  For targets that support unaligned memory,
1066          an unaligned access may be necessary.  */
1067       if (bitsize == GET_MODE_BITSIZE (fieldmode))
1068         {
1069           str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
1070                                              bitnum / BITS_PER_UNIT);
1071           if (reverse)
1072             value = flip_storage_order (fieldmode, value);
1073           gcc_assert (bitnum % BITS_PER_UNIT == 0);
1074           emit_move_insn (str_rtx, value);
1075         }
1076       else
1077         {
1078           rtx temp;
1079
1080           str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
1081                                           &bitnum);
1082           gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (fieldmode));
1083           temp = copy_to_reg (str_rtx);
1084           if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
1085                                   fieldmode, value, reverse, true))
1086             gcc_unreachable ();
1087
1088           emit_move_insn (str_rtx, temp);
1089         }
1090
1091       return;
1092     }
1093
1094   /* Under the C++0x memory model, we must not touch bits outside the
1095      bit region.  Adjust the address to start at the beginning of the
1096      bit region.  */
1097   if (MEM_P (str_rtx) && bitregion_start > 0)
1098     {
1099       machine_mode bestmode;
1100       HOST_WIDE_INT offset, size;
1101
1102       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1103
1104       offset = bitregion_start / BITS_PER_UNIT;
1105       bitnum -= bitregion_start;
1106       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1107       bitregion_end -= bitregion_start;
1108       bitregion_start = 0;
1109       bestmode = get_best_mode (bitsize, bitnum,
1110                                 bitregion_start, bitregion_end,
1111                                 MEM_ALIGN (str_rtx), VOIDmode,
1112                                 MEM_VOLATILE_P (str_rtx));
1113       str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
1114     }
1115
1116   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1117                           bitregion_start, bitregion_end,
1118                           fieldmode, value, reverse, true))
1119     gcc_unreachable ();
1120 }
1121 \f
1122 /* Use shifts and boolean operations to store VALUE into a bit field of
1123    width BITSIZE in OP0, starting at bit BITNUM.
1124
1125    If REVERSE is true, the store is to be done in reverse order.  */
1126
1127 static void
1128 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1129                        unsigned HOST_WIDE_INT bitnum,
1130                        unsigned HOST_WIDE_INT bitregion_start,
1131                        unsigned HOST_WIDE_INT bitregion_end,
1132                        rtx value, bool reverse)
1133 {
1134   /* There is a case not handled here:
1135      a structure with a known alignment of just a halfword
1136      and a field split across two aligned halfwords within the structure.
1137      Or likewise a structure with a known alignment of just a byte
1138      and a field split across two bytes.
1139      Such cases are not supposed to be able to occur.  */
1140
1141   if (MEM_P (op0))
1142     {
1143       machine_mode mode = GET_MODE (op0);
1144       if (GET_MODE_BITSIZE (mode) == 0
1145           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1146         mode = word_mode;
1147       mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1148                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1149
1150       if (mode == VOIDmode)
1151         {
1152           /* The only way this should occur is if the field spans word
1153              boundaries.  */
1154           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1155                                  bitregion_end, value, reverse);
1156           return;
1157         }
1158
1159       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1160     }
1161
1162   store_fixed_bit_field_1 (op0, bitsize, bitnum, value, reverse);
1163 }
1164
1165 /* Helper function for store_fixed_bit_field, stores
1166    the bit field always using the MODE of OP0.  */
1167
1168 static void
1169 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1170                          unsigned HOST_WIDE_INT bitnum,
1171                          rtx value, bool reverse)
1172 {
1173   machine_mode mode;
1174   rtx temp;
1175   int all_zero = 0;
1176   int all_one = 0;
1177
1178   mode = GET_MODE (op0);
1179   gcc_assert (SCALAR_INT_MODE_P (mode));
1180
1181   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1182      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1183
1184   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1185     /* BITNUM is the distance between our msb
1186        and that of the containing datum.
1187        Convert it to the distance from the lsb.  */
1188     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1189
1190   /* Now BITNUM is always the distance between our lsb
1191      and that of OP0.  */
1192
1193   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1194      we must first convert its mode to MODE.  */
1195
1196   if (CONST_INT_P (value))
1197     {
1198       unsigned HOST_WIDE_INT v = UINTVAL (value);
1199
1200       if (bitsize < HOST_BITS_PER_WIDE_INT)
1201         v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1202
1203       if (v == 0)
1204         all_zero = 1;
1205       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1206                 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1207                || (bitsize == HOST_BITS_PER_WIDE_INT
1208                    && v == HOST_WIDE_INT_M1U))
1209         all_one = 1;
1210
1211       value = lshift_value (mode, v, bitnum);
1212     }
1213   else
1214     {
1215       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1216                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1217
1218       if (GET_MODE (value) != mode)
1219         value = convert_to_mode (mode, value, 1);
1220
1221       if (must_and)
1222         value = expand_binop (mode, and_optab, value,
1223                               mask_rtx (mode, 0, bitsize, 0),
1224                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1225       if (bitnum > 0)
1226         value = expand_shift (LSHIFT_EXPR, mode, value,
1227                               bitnum, NULL_RTX, 1);
1228     }
1229
1230   if (reverse)
1231     value = flip_storage_order (mode, value);
1232
1233   /* Now clear the chosen bits in OP0,
1234      except that if VALUE is -1 we need not bother.  */
1235   /* We keep the intermediates in registers to allow CSE to combine
1236      consecutive bitfield assignments.  */
1237
1238   temp = force_reg (mode, op0);
1239
1240   if (! all_one)
1241     {
1242       rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1243       if (reverse)
1244         mask = flip_storage_order (mode, mask);
1245       temp = expand_binop (mode, and_optab, temp, mask,
1246                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1247       temp = force_reg (mode, temp);
1248     }
1249
1250   /* Now logical-or VALUE into OP0, unless it is zero.  */
1251
1252   if (! all_zero)
1253     {
1254       temp = expand_binop (mode, ior_optab, temp, value,
1255                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1256       temp = force_reg (mode, temp);
1257     }
1258
1259   if (op0 != temp)
1260     {
1261       op0 = copy_rtx (op0);
1262       emit_move_insn (op0, temp);
1263     }
1264 }
1265 \f
1266 /* Store a bit field that is split across multiple accessible memory objects.
1267
1268    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1269    BITSIZE is the field width; BITPOS the position of its first bit
1270    (within the word).
1271    VALUE is the value to store.
1272
1273    If REVERSE is true, the store is to be done in reverse order.
1274
1275    This does not yet handle fields wider than BITS_PER_WORD.  */
1276
1277 static void
1278 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1279                        unsigned HOST_WIDE_INT bitpos,
1280                        unsigned HOST_WIDE_INT bitregion_start,
1281                        unsigned HOST_WIDE_INT bitregion_end,
1282                        rtx value, bool reverse)
1283 {
1284   unsigned int unit, total_bits, bitsdone = 0;
1285
1286   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1287      much at a time.  */
1288   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1289     unit = BITS_PER_WORD;
1290   else
1291     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1292
1293   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1294      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1295      again, and we will mutually recurse forever.  */
1296   if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1297     unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1298
1299   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1300      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1301      that VALUE might be a floating-point constant.  */
1302   if (CONSTANT_P (value) && !CONST_INT_P (value))
1303     {
1304       rtx word = gen_lowpart_common (word_mode, value);
1305
1306       if (word && (value != word))
1307         value = word;
1308       else
1309         value = gen_lowpart_common (word_mode,
1310                                     force_reg (GET_MODE (value) != VOIDmode
1311                                                ? GET_MODE (value)
1312                                                : word_mode, value));
1313     }
1314
1315   total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1316
1317   while (bitsdone < bitsize)
1318     {
1319       unsigned HOST_WIDE_INT thissize;
1320       unsigned HOST_WIDE_INT thispos;
1321       unsigned HOST_WIDE_INT offset;
1322       rtx part, word;
1323
1324       offset = (bitpos + bitsdone) / unit;
1325       thispos = (bitpos + bitsdone) % unit;
1326
1327       /* When region of bytes we can touch is restricted, decrease
1328          UNIT close to the end of the region as needed.  If op0 is a REG
1329          or SUBREG of REG, don't do this, as there can't be data races
1330          on a register and we can expand shorter code in some cases.  */
1331       if (bitregion_end
1332           && unit > BITS_PER_UNIT
1333           && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1334           && !REG_P (op0)
1335           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1336         {
1337           unit = unit / 2;
1338           continue;
1339         }
1340
1341       /* THISSIZE must not overrun a word boundary.  Otherwise,
1342          store_fixed_bit_field will call us again, and we will mutually
1343          recurse forever.  */
1344       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1345       thissize = MIN (thissize, unit - thispos);
1346
1347       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1348         {
1349           /* Fetch successively less significant portions.  */
1350           if (CONST_INT_P (value))
1351             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1352                              >> (bitsize - bitsdone - thissize))
1353                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1354           /* Likewise, but the source is little-endian.  */
1355           else if (reverse)
1356             part = extract_fixed_bit_field (word_mode, value, thissize,
1357                                             bitsize - bitsdone - thissize,
1358                                             NULL_RTX, 1, false);
1359           else
1360             {
1361               int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1362               /* The args are chosen so that the last part includes the
1363                  lsb.  Give extract_bit_field the value it needs (with
1364                  endianness compensation) to fetch the piece we want.  */
1365               part = extract_fixed_bit_field (word_mode, value, thissize,
1366                                               total_bits - bitsize + bitsdone,
1367                                               NULL_RTX, 1, false);
1368             }
1369         }
1370       else
1371         {
1372           /* Fetch successively more significant portions.  */
1373           if (CONST_INT_P (value))
1374             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1375                              >> bitsdone)
1376                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1377           /* Likewise, but the source is big-endian.  */
1378           else if (reverse)
1379             part = extract_fixed_bit_field (word_mode, value, thissize,
1380                                             total_bits - bitsdone - thissize,
1381                                             NULL_RTX, 1, false);
1382           else
1383             part = extract_fixed_bit_field (word_mode, value, thissize,
1384                                             bitsdone, NULL_RTX, 1, false);
1385         }
1386
1387       /* If OP0 is a register, then handle OFFSET here.  */
1388       if (SUBREG_P (op0) || REG_P (op0))
1389         {
1390           machine_mode op0_mode = GET_MODE (op0);
1391           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1392             word = offset ? const0_rtx : op0;
1393           else
1394             word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1395                                           GET_MODE (op0));
1396           offset &= BITS_PER_WORD / unit - 1;
1397         }
1398       else
1399         word = op0;
1400
1401       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1402          it is just an out-of-bounds access.  Ignore it.  */
1403       if (word != const0_rtx)
1404         store_fixed_bit_field (word, thissize, offset * unit + thispos,
1405                                bitregion_start, bitregion_end, part,
1406                                reverse);
1407       bitsdone += thissize;
1408     }
1409 }
1410 \f
1411 /* A subroutine of extract_bit_field_1 that converts return value X
1412    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1413    to extract_bit_field.  */
1414
1415 static rtx
1416 convert_extracted_bit_field (rtx x, machine_mode mode,
1417                              machine_mode tmode, bool unsignedp)
1418 {
1419   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1420     return x;
1421
1422   /* If the x mode is not a scalar integral, first convert to the
1423      integer mode of that size and then access it as a floating-point
1424      value via a SUBREG.  */
1425   if (!SCALAR_INT_MODE_P (tmode))
1426     {
1427       machine_mode smode;
1428
1429       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1430       x = convert_to_mode (smode, x, unsignedp);
1431       x = force_reg (smode, x);
1432       return gen_lowpart (tmode, x);
1433     }
1434
1435   return convert_to_mode (tmode, x, unsignedp);
1436 }
1437
1438 /* Try to use an ext(z)v pattern to extract a field from OP0.
1439    Return the extracted value on success, otherwise return null.
1440    EXT_MODE is the mode of the extraction and the other arguments
1441    are as for extract_bit_field.  */
1442
1443 static rtx
1444 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1445                               unsigned HOST_WIDE_INT bitsize,
1446                               unsigned HOST_WIDE_INT bitnum,
1447                               int unsignedp, rtx target,
1448                               machine_mode mode, machine_mode tmode)
1449 {
1450   struct expand_operand ops[4];
1451   rtx spec_target = target;
1452   rtx spec_target_subreg = 0;
1453   machine_mode ext_mode = extv->field_mode;
1454   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1455
1456   if (bitsize == 0 || unit < bitsize)
1457     return NULL_RTX;
1458
1459   if (MEM_P (op0))
1460     /* Get a reference to the first byte of the field.  */
1461     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1462                                 &bitnum);
1463   else
1464     {
1465       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1466       if (BYTES_BIG_ENDIAN)
1467         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1468
1469       /* If op0 is a register, we need it in EXT_MODE to make it
1470          acceptable to the format of ext(z)v.  */
1471       if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1472         return NULL_RTX;
1473       if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1474         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1475     }
1476
1477   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1478      "backwards" from the size of the unit we are extracting from.
1479      Otherwise, we count bits from the most significant on a
1480      BYTES/BITS_BIG_ENDIAN machine.  */
1481
1482   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1483     bitnum = unit - bitsize - bitnum;
1484
1485   if (target == 0)
1486     target = spec_target = gen_reg_rtx (tmode);
1487
1488   if (GET_MODE (target) != ext_mode)
1489     {
1490       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1491          between the mode of the extraction (word_mode) and the target
1492          mode.  Instead, create a temporary and use convert_move to set
1493          the target.  */
1494       if (REG_P (target)
1495           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1496         {
1497           target = gen_lowpart (ext_mode, target);
1498           if (GET_MODE_PRECISION (ext_mode)
1499               > GET_MODE_PRECISION (GET_MODE (spec_target)))
1500             spec_target_subreg = target;
1501         }
1502       else
1503         target = gen_reg_rtx (ext_mode);
1504     }
1505
1506   create_output_operand (&ops[0], target, ext_mode);
1507   create_fixed_operand (&ops[1], op0);
1508   create_integer_operand (&ops[2], bitsize);
1509   create_integer_operand (&ops[3], bitnum);
1510   if (maybe_expand_insn (extv->icode, 4, ops))
1511     {
1512       target = ops[0].value;
1513       if (target == spec_target)
1514         return target;
1515       if (target == spec_target_subreg)
1516         return spec_target;
1517       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1518     }
1519   return NULL_RTX;
1520 }
1521
1522 /* A subroutine of extract_bit_field, with the same arguments.
1523    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1524    if we can find no other means of implementing the operation.
1525    if FALLBACK_P is false, return NULL instead.  */
1526
1527 static rtx
1528 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1529                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1530                      machine_mode mode, machine_mode tmode,
1531                      bool reverse, bool fallback_p)
1532 {
1533   rtx op0 = str_rtx;
1534   machine_mode int_mode;
1535   machine_mode mode1;
1536
1537   if (tmode == VOIDmode)
1538     tmode = mode;
1539
1540   while (GET_CODE (op0) == SUBREG)
1541     {
1542       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1543       op0 = SUBREG_REG (op0);
1544     }
1545
1546   /* If we have an out-of-bounds access to a register, just return an
1547      uninitialized register of the required mode.  This can occur if the
1548      source code contains an out-of-bounds access to a small array.  */
1549   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1550     return gen_reg_rtx (tmode);
1551
1552   if (REG_P (op0)
1553       && mode == GET_MODE (op0)
1554       && bitnum == 0
1555       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1556     {
1557       if (reverse)
1558         op0 = flip_storage_order (mode, op0);
1559       /* We're trying to extract a full register from itself.  */
1560       return op0;
1561     }
1562
1563   /* See if we can get a better vector mode before extracting.  */
1564   if (VECTOR_MODE_P (GET_MODE (op0))
1565       && !MEM_P (op0)
1566       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1567     {
1568       machine_mode new_mode;
1569
1570       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1571         new_mode = MIN_MODE_VECTOR_FLOAT;
1572       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1573         new_mode = MIN_MODE_VECTOR_FRACT;
1574       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1575         new_mode = MIN_MODE_VECTOR_UFRACT;
1576       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1577         new_mode = MIN_MODE_VECTOR_ACCUM;
1578       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1579         new_mode = MIN_MODE_VECTOR_UACCUM;
1580       else
1581         new_mode = MIN_MODE_VECTOR_INT;
1582
1583       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1584         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1585             && GET_MODE_UNIT_SIZE (new_mode) == GET_MODE_SIZE (tmode)
1586             && targetm.vector_mode_supported_p (new_mode))
1587           break;
1588       if (new_mode != VOIDmode)
1589         op0 = gen_lowpart (new_mode, op0);
1590     }
1591
1592   /* Use vec_extract patterns for extracting parts of vectors whenever
1593      available.  */
1594   if (VECTOR_MODE_P (GET_MODE (op0))
1595       && !MEM_P (op0)
1596       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1597       && ((bitnum + bitsize - 1) / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
1598           == bitnum / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
1599     {
1600       struct expand_operand ops[3];
1601       machine_mode outermode = GET_MODE (op0);
1602       machine_mode innermode = GET_MODE_INNER (outermode);
1603       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1604       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1605
1606       create_output_operand (&ops[0], target, innermode);
1607       create_input_operand (&ops[1], op0, outermode);
1608       create_integer_operand (&ops[2], pos);
1609       if (maybe_expand_insn (icode, 3, ops))
1610         {
1611           target = ops[0].value;
1612           if (GET_MODE (target) != mode)
1613             return gen_lowpart (tmode, target);
1614           return target;
1615         }
1616     }
1617
1618   /* Make sure we are playing with integral modes.  Pun with subregs
1619      if we aren't.  */
1620   {
1621     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1622     if (imode != GET_MODE (op0))
1623       {
1624         if (MEM_P (op0))
1625           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1626         else if (imode != BLKmode)
1627           {
1628             op0 = gen_lowpart (imode, op0);
1629
1630             /* If we got a SUBREG, force it into a register since we
1631                aren't going to be able to do another SUBREG on it.  */
1632             if (GET_CODE (op0) == SUBREG)
1633               op0 = force_reg (imode, op0);
1634           }
1635         else
1636           {
1637             HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1638             rtx mem = assign_stack_temp (GET_MODE (op0), size);
1639             emit_move_insn (mem, op0);
1640             op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1641           }
1642       }
1643   }
1644
1645   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1646      If that's wrong, the solution is to test for it and set TARGET to 0
1647      if needed.  */
1648
1649   /* Get the mode of the field to use for atomic access or subreg
1650      conversion.  */
1651   mode1 = mode;
1652   if (SCALAR_INT_MODE_P (tmode))
1653     {
1654       machine_mode try_mode = mode_for_size (bitsize,
1655                                                   GET_MODE_CLASS (tmode), 0);
1656       if (try_mode != BLKmode)
1657         mode1 = try_mode;
1658     }
1659   gcc_assert (mode1 != BLKmode);
1660
1661   /* Extraction of a full MODE1 value can be done with a subreg as long
1662      as the least significant bit of the value is the least significant
1663      bit of either OP0 or a word of OP0.  */
1664   if (!MEM_P (op0)
1665       && !reverse
1666       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1667       && bitsize == GET_MODE_BITSIZE (mode1)
1668       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1669     {
1670       rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1671                                      bitnum / BITS_PER_UNIT);
1672       if (sub)
1673         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1674     }
1675
1676   /* Extraction of a full MODE1 value can be done with a load as long as
1677      the field is on a byte boundary and is sufficiently aligned.  */
1678   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1679     {
1680       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1681       if (reverse)
1682         op0 = flip_storage_order (mode1, op0);
1683       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1684     }
1685
1686   /* Handle fields bigger than a word.  */
1687
1688   if (bitsize > BITS_PER_WORD)
1689     {
1690       /* Here we transfer the words of the field
1691          in the order least significant first.
1692          This is because the most significant word is the one which may
1693          be less than full.  */
1694
1695       const bool backwards = WORDS_BIG_ENDIAN;
1696       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1697       unsigned int i;
1698       rtx_insn *last;
1699
1700       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1701         target = gen_reg_rtx (mode);
1702
1703       /* In case we're about to clobber a base register or something
1704          (see gcc.c-torture/execute/20040625-1.c).   */
1705       if (reg_mentioned_p (target, str_rtx))
1706         target = gen_reg_rtx (mode);
1707
1708       /* Indicate for flow that the entire target reg is being set.  */
1709       emit_clobber (target);
1710
1711       last = get_last_insn ();
1712       for (i = 0; i < nwords; i++)
1713         {
1714           /* If I is 0, use the low-order word in both field and target;
1715              if I is 1, use the next to lowest word; and so on.  */
1716           /* Word number in TARGET to use.  */
1717           unsigned int wordnum
1718             = (backwards
1719                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1720                : i);
1721           /* Offset from start of field in OP0.  */
1722           unsigned int bit_offset = (backwards ^ reverse
1723                                      ? MAX ((int) bitsize - ((int) i + 1)
1724                                             * BITS_PER_WORD,
1725                                             0)
1726                                      : (int) i * BITS_PER_WORD);
1727           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1728           rtx result_part
1729             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1730                                              bitsize - i * BITS_PER_WORD),
1731                                    bitnum + bit_offset, 1, target_part,
1732                                    mode, word_mode, reverse, fallback_p);
1733
1734           gcc_assert (target_part);
1735           if (!result_part)
1736             {
1737               delete_insns_since (last);
1738               return NULL;
1739             }
1740
1741           if (result_part != target_part)
1742             emit_move_insn (target_part, result_part);
1743         }
1744
1745       if (unsignedp)
1746         {
1747           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1748              need to be zero'd out.  */
1749           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1750             {
1751               unsigned int i, total_words;
1752
1753               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1754               for (i = nwords; i < total_words; i++)
1755                 emit_move_insn
1756                   (operand_subword (target,
1757                                     backwards ? total_words - i - 1 : i,
1758                                     1, VOIDmode),
1759                    const0_rtx);
1760             }
1761           return target;
1762         }
1763
1764       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1765       target = expand_shift (LSHIFT_EXPR, mode, target,
1766                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1767       return expand_shift (RSHIFT_EXPR, mode, target,
1768                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1769     }
1770
1771   /* If OP0 is a multi-word register, narrow it to the affected word.
1772      If the region spans two words, defer to extract_split_bit_field.  */
1773   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1774     {
1775       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1776         {
1777           if (!fallback_p)
1778             return NULL_RTX;
1779           target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1780                                             reverse);
1781           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1782         }
1783       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1784                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1785       bitnum %= BITS_PER_WORD;
1786     }
1787
1788   /* From here on we know the desired field is smaller than a word.
1789      If OP0 is a register, it too fits within a word.  */
1790   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1791   extraction_insn extv;
1792   if (!MEM_P (op0)
1793       && !reverse
1794       /* ??? We could limit the structure size to the part of OP0 that
1795          contains the field, with appropriate checks for endianness
1796          and TRULY_NOOP_TRUNCATION.  */
1797       && get_best_reg_extraction_insn (&extv, pattern,
1798                                        GET_MODE_BITSIZE (GET_MODE (op0)),
1799                                        tmode))
1800     {
1801       rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1802                                                  unsignedp, target, mode,
1803                                                  tmode);
1804       if (result)
1805         return result;
1806     }
1807
1808   /* If OP0 is a memory, try copying it to a register and seeing if a
1809      cheap register alternative is available.  */
1810   if (MEM_P (op0) & !reverse)
1811     {
1812       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1813                                         tmode))
1814         {
1815           rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1816                                                      bitnum, unsignedp,
1817                                                      target, mode,
1818                                                      tmode);
1819           if (result)
1820             return result;
1821         }
1822
1823       rtx_insn *last = get_last_insn ();
1824
1825       /* Try loading part of OP0 into a register and extracting the
1826          bitfield from that.  */
1827       unsigned HOST_WIDE_INT bitpos;
1828       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1829                                                0, 0, tmode, &bitpos);
1830       if (xop0)
1831         {
1832           xop0 = copy_to_reg (xop0);
1833           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1834                                             unsignedp, target,
1835                                             mode, tmode, reverse, false);
1836           if (result)
1837             return result;
1838           delete_insns_since (last);
1839         }
1840     }
1841
1842   if (!fallback_p)
1843     return NULL;
1844
1845   /* Find a correspondingly-sized integer field, so we can apply
1846      shifts and masks to it.  */
1847   int_mode = int_mode_for_mode (tmode);
1848   if (int_mode == BLKmode)
1849     int_mode = int_mode_for_mode (mode);
1850   /* Should probably push op0 out to memory and then do a load.  */
1851   gcc_assert (int_mode != BLKmode);
1852
1853   target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum, target,
1854                                     unsignedp, reverse);
1855
1856   /* Complex values must be reversed piecewise, so we need to undo the global
1857      reversal, convert to the complex mode and reverse again.  */
1858   if (reverse && COMPLEX_MODE_P (tmode))
1859     {
1860       target = flip_storage_order (int_mode, target);
1861       target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1862       target = flip_storage_order (tmode, target);
1863     }
1864   else
1865     target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1866
1867   return target;
1868 }
1869
1870 /* Generate code to extract a byte-field from STR_RTX
1871    containing BITSIZE bits, starting at BITNUM,
1872    and put it in TARGET if possible (if TARGET is nonzero).
1873    Regardless of TARGET, we return the rtx for where the value is placed.
1874
1875    STR_RTX is the structure containing the byte (a REG or MEM).
1876    UNSIGNEDP is nonzero if this is an unsigned bit field.
1877    MODE is the natural mode of the field value once extracted.
1878    TMODE is the mode the caller would like the value to have;
1879    but the value may be returned with type MODE instead.
1880
1881    If REVERSE is true, the extraction is to be done in reverse order.
1882
1883    If a TARGET is specified and we can store in it at no extra cost,
1884    we do so, and return TARGET.
1885    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1886    if they are equally easy.  */
1887
1888 rtx
1889 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1890                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1891                    machine_mode mode, machine_mode tmode, bool reverse)
1892 {
1893   machine_mode mode1;
1894
1895   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1896   if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1897     mode1 = GET_MODE (str_rtx);
1898   else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1899     mode1 = GET_MODE (target);
1900   else
1901     mode1 = tmode;
1902
1903   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1904     {
1905       /* Extraction of a full MODE1 value can be done with a simple load.
1906          We know here that the field can be accessed with one single
1907          instruction.  For targets that support unaligned memory,
1908          an unaligned access may be necessary.  */
1909       if (bitsize == GET_MODE_BITSIZE (mode1))
1910         {
1911           rtx result = adjust_bitfield_address (str_rtx, mode1,
1912                                                 bitnum / BITS_PER_UNIT);
1913           if (reverse)
1914             result = flip_storage_order (mode1, result);
1915           gcc_assert (bitnum % BITS_PER_UNIT == 0);
1916           return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1917         }
1918
1919       str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1920                                       &bitnum);
1921       gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (mode1));
1922       str_rtx = copy_to_reg (str_rtx);
1923     }
1924
1925   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1926                               target, mode, tmode, reverse, true);
1927 }
1928 \f
1929 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1930    from bit BITNUM of OP0.
1931
1932    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1933    If REVERSE is true, the extraction is to be done in reverse order.
1934
1935    If TARGET is nonzero, attempts to store the value there
1936    and return TARGET, but this is not guaranteed.
1937    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1938
1939 static rtx
1940 extract_fixed_bit_field (machine_mode tmode, rtx op0,
1941                          unsigned HOST_WIDE_INT bitsize,
1942                          unsigned HOST_WIDE_INT bitnum, rtx target,
1943                          int unsignedp, bool reverse)
1944 {
1945   if (MEM_P (op0))
1946     {
1947       machine_mode mode
1948         = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1949                          MEM_VOLATILE_P (op0));
1950
1951       if (mode == VOIDmode)
1952         /* The only way this should occur is if the field spans word
1953            boundaries.  */
1954         return extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1955                                         reverse);
1956
1957       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1958     }
1959
1960   return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1961                                     target, unsignedp, reverse);
1962 }
1963
1964 /* Helper function for extract_fixed_bit_field, extracts
1965    the bit field always using the MODE of OP0.  */
1966
1967 static rtx
1968 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
1969                            unsigned HOST_WIDE_INT bitsize,
1970                            unsigned HOST_WIDE_INT bitnum, rtx target,
1971                            int unsignedp, bool reverse)
1972 {
1973   machine_mode mode = GET_MODE (op0);
1974   gcc_assert (SCALAR_INT_MODE_P (mode));
1975
1976   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1977      for invalid input, such as extract equivalent of f5 from
1978      gcc.dg/pr48335-2.c.  */
1979
1980   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1981     /* BITNUM is the distance between our msb and that of OP0.
1982        Convert it to the distance from the lsb.  */
1983     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1984
1985   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1986      We have reduced the big-endian case to the little-endian case.  */
1987   if (reverse)
1988     op0 = flip_storage_order (mode, op0);
1989
1990   if (unsignedp)
1991     {
1992       if (bitnum)
1993         {
1994           /* If the field does not already start at the lsb,
1995              shift it so it does.  */
1996           /* Maybe propagate the target for the shift.  */
1997           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1998           if (tmode != mode)
1999             subtarget = 0;
2000           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2001         }
2002       /* Convert the value to the desired mode.  */
2003       if (mode != tmode)
2004         op0 = convert_to_mode (tmode, op0, 1);
2005
2006       /* Unless the msb of the field used to be the msb when we shifted,
2007          mask out the upper bits.  */
2008
2009       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2010         return expand_binop (GET_MODE (op0), and_optab, op0,
2011                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
2012                              target, 1, OPTAB_LIB_WIDEN);
2013       return op0;
2014     }
2015
2016   /* To extract a signed bit-field, first shift its msb to the msb of the word,
2017      then arithmetic-shift its lsb to the lsb of the word.  */
2018   op0 = force_reg (mode, op0);
2019
2020   /* Find the narrowest integer mode that contains the field.  */
2021
2022   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
2023        mode = GET_MODE_WIDER_MODE (mode))
2024     if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
2025       {
2026         op0 = convert_to_mode (mode, op0, 0);
2027         break;
2028       }
2029
2030   if (mode != tmode)
2031     target = 0;
2032
2033   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2034     {
2035       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2036       /* Maybe propagate the target for the shift.  */
2037       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2038       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2039     }
2040
2041   return expand_shift (RSHIFT_EXPR, mode, op0,
2042                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2043 }
2044
2045 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2046    VALUE << BITPOS.  */
2047
2048 static rtx
2049 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2050               int bitpos)
2051 {
2052   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2053 }
2054 \f
2055 /* Extract a bit field that is split across two words
2056    and return an RTX for the result.
2057
2058    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2059    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2060    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2061
2062    If REVERSE is true, the extraction is to be done in reverse order.  */
2063
2064 static rtx
2065 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2066                          unsigned HOST_WIDE_INT bitpos, int unsignedp,
2067                          bool reverse)
2068 {
2069   unsigned int unit;
2070   unsigned int bitsdone = 0;
2071   rtx result = NULL_RTX;
2072   int first = 1;
2073
2074   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2075      much at a time.  */
2076   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2077     unit = BITS_PER_WORD;
2078   else
2079     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2080
2081   while (bitsdone < bitsize)
2082     {
2083       unsigned HOST_WIDE_INT thissize;
2084       rtx part, word;
2085       unsigned HOST_WIDE_INT thispos;
2086       unsigned HOST_WIDE_INT offset;
2087
2088       offset = (bitpos + bitsdone) / unit;
2089       thispos = (bitpos + bitsdone) % unit;
2090
2091       /* THISSIZE must not overrun a word boundary.  Otherwise,
2092          extract_fixed_bit_field will call us again, and we will mutually
2093          recurse forever.  */
2094       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2095       thissize = MIN (thissize, unit - thispos);
2096
2097       /* If OP0 is a register, then handle OFFSET here.  */
2098       if (SUBREG_P (op0) || REG_P (op0))
2099         {
2100           word = operand_subword_force (op0, offset, GET_MODE (op0));
2101           offset = 0;
2102         }
2103       else
2104         word = op0;
2105
2106       /* Extract the parts in bit-counting order,
2107          whose meaning is determined by BYTES_PER_UNIT.
2108          OFFSET is in UNITs, and UNIT is in bits.  */
2109       part = extract_fixed_bit_field (word_mode, word, thissize,
2110                                       offset * unit + thispos, 0, 1, reverse);
2111       bitsdone += thissize;
2112
2113       /* Shift this part into place for the result.  */
2114       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2115         {
2116           if (bitsize != bitsdone)
2117             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2118                                  bitsize - bitsdone, 0, 1);
2119         }
2120       else
2121         {
2122           if (bitsdone != thissize)
2123             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2124                                  bitsdone - thissize, 0, 1);
2125         }
2126
2127       if (first)
2128         result = part;
2129       else
2130         /* Combine the parts with bitwise or.  This works
2131            because we extracted each part as an unsigned bit field.  */
2132         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2133                                OPTAB_LIB_WIDEN);
2134
2135       first = 0;
2136     }
2137
2138   /* Unsigned bit field: we are done.  */
2139   if (unsignedp)
2140     return result;
2141   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2142   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2143                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2144   return expand_shift (RSHIFT_EXPR, word_mode, result,
2145                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2146 }
2147 \f
2148 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2149    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2150    MODE, fill the upper bits with zeros.  Fail if the layout of either
2151    mode is unknown (as for CC modes) or if the extraction would involve
2152    unprofitable mode punning.  Return the value on success, otherwise
2153    return null.
2154
2155    This is different from gen_lowpart* in these respects:
2156
2157      - the returned value must always be considered an rvalue
2158
2159      - when MODE is wider than SRC_MODE, the extraction involves
2160        a zero extension
2161
2162      - when MODE is smaller than SRC_MODE, the extraction involves
2163        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2164
2165    In other words, this routine performs a computation, whereas the
2166    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2167    operations.  */
2168
2169 rtx
2170 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2171 {
2172   machine_mode int_mode, src_int_mode;
2173
2174   if (mode == src_mode)
2175     return src;
2176
2177   if (CONSTANT_P (src))
2178     {
2179       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2180          fails, it will happily create (subreg (symbol_ref)) or similar
2181          invalid SUBREGs.  */
2182       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2183       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2184       if (ret)
2185         return ret;
2186
2187       if (GET_MODE (src) == VOIDmode
2188           || !validate_subreg (mode, src_mode, src, byte))
2189         return NULL_RTX;
2190
2191       src = force_reg (GET_MODE (src), src);
2192       return gen_rtx_SUBREG (mode, src, byte);
2193     }
2194
2195   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2196     return NULL_RTX;
2197
2198   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2199       && MODES_TIEABLE_P (mode, src_mode))
2200     {
2201       rtx x = gen_lowpart_common (mode, src);
2202       if (x)
2203         return x;
2204     }
2205
2206   src_int_mode = int_mode_for_mode (src_mode);
2207   int_mode = int_mode_for_mode (mode);
2208   if (src_int_mode == BLKmode || int_mode == BLKmode)
2209     return NULL_RTX;
2210
2211   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2212     return NULL_RTX;
2213   if (!MODES_TIEABLE_P (int_mode, mode))
2214     return NULL_RTX;
2215
2216   src = gen_lowpart (src_int_mode, src);
2217   src = convert_modes (int_mode, src_int_mode, src, true);
2218   src = gen_lowpart (mode, src);
2219   return src;
2220 }
2221 \f
2222 /* Add INC into TARGET.  */
2223
2224 void
2225 expand_inc (rtx target, rtx inc)
2226 {
2227   rtx value = expand_binop (GET_MODE (target), add_optab,
2228                             target, inc,
2229                             target, 0, OPTAB_LIB_WIDEN);
2230   if (value != target)
2231     emit_move_insn (target, value);
2232 }
2233
2234 /* Subtract DEC from TARGET.  */
2235
2236 void
2237 expand_dec (rtx target, rtx dec)
2238 {
2239   rtx value = expand_binop (GET_MODE (target), sub_optab,
2240                             target, dec,
2241                             target, 0, OPTAB_LIB_WIDEN);
2242   if (value != target)
2243     emit_move_insn (target, value);
2244 }
2245 \f
2246 /* Output a shift instruction for expression code CODE,
2247    with SHIFTED being the rtx for the value to shift,
2248    and AMOUNT the rtx for the amount to shift by.
2249    Store the result in the rtx TARGET, if that is convenient.
2250    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2251    Return the rtx for where the value is.
2252    If that cannot be done, abort the compilation unless MAY_FAIL is true,
2253    in which case 0 is returned.  */
2254
2255 static rtx
2256 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2257                 rtx amount, rtx target, int unsignedp, bool may_fail = false)
2258 {
2259   rtx op1, temp = 0;
2260   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2261   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2262   optab lshift_optab = ashl_optab;
2263   optab rshift_arith_optab = ashr_optab;
2264   optab rshift_uns_optab = lshr_optab;
2265   optab lrotate_optab = rotl_optab;
2266   optab rrotate_optab = rotr_optab;
2267   machine_mode op1_mode;
2268   machine_mode scalar_mode = mode;
2269   int attempt;
2270   bool speed = optimize_insn_for_speed_p ();
2271
2272   if (VECTOR_MODE_P (mode))
2273     scalar_mode = GET_MODE_INNER (mode);
2274   op1 = amount;
2275   op1_mode = GET_MODE (op1);
2276
2277   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2278      shift amount is a vector, use the vector/vector shift patterns.  */
2279   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2280     {
2281       lshift_optab = vashl_optab;
2282       rshift_arith_optab = vashr_optab;
2283       rshift_uns_optab = vlshr_optab;
2284       lrotate_optab = vrotl_optab;
2285       rrotate_optab = vrotr_optab;
2286     }
2287
2288   /* Previously detected shift-counts computed by NEGATE_EXPR
2289      and shifted in the other direction; but that does not work
2290      on all machines.  */
2291
2292   if (SHIFT_COUNT_TRUNCATED)
2293     {
2294       if (CONST_INT_P (op1)
2295           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2296               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2297         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2298                        % GET_MODE_BITSIZE (scalar_mode));
2299       else if (GET_CODE (op1) == SUBREG
2300                && subreg_lowpart_p (op1)
2301                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2302                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2303         op1 = SUBREG_REG (op1);
2304     }
2305
2306   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2307      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2308      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2309      amount instead.  */
2310   if (rotate
2311       && CONST_INT_P (op1)
2312       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2313                    GET_MODE_BITSIZE (scalar_mode) - 1))
2314     {
2315       op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2316       left = !left;
2317       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2318     }
2319
2320   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2321      Note that this is not the case for bigger values.  For instance a rotation
2322      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2323      0x04030201 (bswapsi).  */
2324   if (rotate
2325       && CONST_INT_P (op1)
2326       && INTVAL (op1) == BITS_PER_UNIT
2327       && GET_MODE_SIZE (scalar_mode) == 2
2328       && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2329     return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2330                                   unsignedp);
2331
2332   if (op1 == const0_rtx)
2333     return shifted;
2334
2335   /* Check whether its cheaper to implement a left shift by a constant
2336      bit count by a sequence of additions.  */
2337   if (code == LSHIFT_EXPR
2338       && CONST_INT_P (op1)
2339       && INTVAL (op1) > 0
2340       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2341       && INTVAL (op1) < MAX_BITS_PER_WORD
2342       && (shift_cost (speed, mode, INTVAL (op1))
2343           > INTVAL (op1) * add_cost (speed, mode))
2344       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2345     {
2346       int i;
2347       for (i = 0; i < INTVAL (op1); i++)
2348         {
2349           temp = force_reg (mode, shifted);
2350           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2351                                   unsignedp, OPTAB_LIB_WIDEN);
2352         }
2353       return shifted;
2354     }
2355
2356   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2357     {
2358       enum optab_methods methods;
2359
2360       if (attempt == 0)
2361         methods = OPTAB_DIRECT;
2362       else if (attempt == 1)
2363         methods = OPTAB_WIDEN;
2364       else
2365         methods = OPTAB_LIB_WIDEN;
2366
2367       if (rotate)
2368         {
2369           /* Widening does not work for rotation.  */
2370           if (methods == OPTAB_WIDEN)
2371             continue;
2372           else if (methods == OPTAB_LIB_WIDEN)
2373             {
2374               /* If we have been unable to open-code this by a rotation,
2375                  do it as the IOR of two shifts.  I.e., to rotate A
2376                  by N bits, compute
2377                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2378                  where C is the bitsize of A.
2379
2380                  It is theoretically possible that the target machine might
2381                  not be able to perform either shift and hence we would
2382                  be making two libcalls rather than just the one for the
2383                  shift (similarly if IOR could not be done).  We will allow
2384                  this extremely unlikely lossage to avoid complicating the
2385                  code below.  */
2386
2387               rtx subtarget = target == shifted ? 0 : target;
2388               rtx new_amount, other_amount;
2389               rtx temp1;
2390
2391               new_amount = op1;
2392               if (op1 == const0_rtx)
2393                 return shifted;
2394               else if (CONST_INT_P (op1))
2395                 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2396                                         - INTVAL (op1));
2397               else
2398                 {
2399                   other_amount
2400                     = simplify_gen_unary (NEG, GET_MODE (op1),
2401                                           op1, GET_MODE (op1));
2402                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2403                   other_amount
2404                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2405                                            gen_int_mode (mask, GET_MODE (op1)));
2406                 }
2407
2408               shifted = force_reg (mode, shifted);
2409
2410               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2411                                      mode, shifted, new_amount, 0, 1);
2412               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2413                                       mode, shifted, other_amount,
2414                                       subtarget, 1);
2415               return expand_binop (mode, ior_optab, temp, temp1, target,
2416                                    unsignedp, methods);
2417             }
2418
2419           temp = expand_binop (mode,
2420                                left ? lrotate_optab : rrotate_optab,
2421                                shifted, op1, target, unsignedp, methods);
2422         }
2423       else if (unsignedp)
2424         temp = expand_binop (mode,
2425                              left ? lshift_optab : rshift_uns_optab,
2426                              shifted, op1, target, unsignedp, methods);
2427
2428       /* Do arithmetic shifts.
2429          Also, if we are going to widen the operand, we can just as well
2430          use an arithmetic right-shift instead of a logical one.  */
2431       if (temp == 0 && ! rotate
2432           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2433         {
2434           enum optab_methods methods1 = methods;
2435
2436           /* If trying to widen a log shift to an arithmetic shift,
2437              don't accept an arithmetic shift of the same size.  */
2438           if (unsignedp)
2439             methods1 = OPTAB_MUST_WIDEN;
2440
2441           /* Arithmetic shift */
2442
2443           temp = expand_binop (mode,
2444                                left ? lshift_optab : rshift_arith_optab,
2445                                shifted, op1, target, unsignedp, methods1);
2446         }
2447
2448       /* We used to try extzv here for logical right shifts, but that was
2449          only useful for one machine, the VAX, and caused poor code
2450          generation there for lshrdi3, so the code was deleted and a
2451          define_expand for lshrsi3 was added to vax.md.  */
2452     }
2453
2454   gcc_assert (temp != NULL_RTX || may_fail);
2455   return temp;
2456 }
2457
2458 /* Output a shift instruction for expression code CODE,
2459    with SHIFTED being the rtx for the value to shift,
2460    and AMOUNT the amount to shift by.
2461    Store the result in the rtx TARGET, if that is convenient.
2462    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2463    Return the rtx for where the value is.  */
2464
2465 rtx
2466 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2467               int amount, rtx target, int unsignedp)
2468 {
2469   return expand_shift_1 (code, mode,
2470                          shifted, GEN_INT (amount), target, unsignedp);
2471 }
2472
2473 /* Likewise, but return 0 if that cannot be done.  */
2474
2475 static rtx
2476 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2477                     int amount, rtx target, int unsignedp)
2478 {
2479   return expand_shift_1 (code, mode,
2480                          shifted, GEN_INT (amount), target, unsignedp, true);
2481 }
2482
2483 /* Output a shift instruction for expression code CODE,
2484    with SHIFTED being the rtx for the value to shift,
2485    and AMOUNT the tree for the amount to shift by.
2486    Store the result in the rtx TARGET, if that is convenient.
2487    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2488    Return the rtx for where the value is.  */
2489
2490 rtx
2491 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2492                        tree amount, rtx target, int unsignedp)
2493 {
2494   return expand_shift_1 (code, mode,
2495                          shifted, expand_normal (amount), target, unsignedp);
2496 }
2497
2498 \f
2499 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2500                         const struct mult_cost *, machine_mode mode);
2501 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2502                               const struct algorithm *, enum mult_variant);
2503 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2504 static rtx extract_high_half (machine_mode, rtx);
2505 static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2506 static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
2507                                        int, int);
2508 /* Compute and return the best algorithm for multiplying by T.
2509    The algorithm must cost less than cost_limit
2510    If retval.cost >= COST_LIMIT, no algorithm was found and all
2511    other field of the returned struct are undefined.
2512    MODE is the machine mode of the multiplication.  */
2513
2514 static void
2515 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2516             const struct mult_cost *cost_limit, machine_mode mode)
2517 {
2518   int m;
2519   struct algorithm *alg_in, *best_alg;
2520   struct mult_cost best_cost;
2521   struct mult_cost new_limit;
2522   int op_cost, op_latency;
2523   unsigned HOST_WIDE_INT orig_t = t;
2524   unsigned HOST_WIDE_INT q;
2525   int maxm, hash_index;
2526   bool cache_hit = false;
2527   enum alg_code cache_alg = alg_zero;
2528   bool speed = optimize_insn_for_speed_p ();
2529   machine_mode imode;
2530   struct alg_hash_entry *entry_ptr;
2531
2532   /* Indicate that no algorithm is yet found.  If no algorithm
2533      is found, this value will be returned and indicate failure.  */
2534   alg_out->cost.cost = cost_limit->cost + 1;
2535   alg_out->cost.latency = cost_limit->latency + 1;
2536
2537   if (cost_limit->cost < 0
2538       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2539     return;
2540
2541   /* Be prepared for vector modes.  */
2542   imode = GET_MODE_INNER (mode);
2543
2544   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2545
2546   /* Restrict the bits of "t" to the multiplication's mode.  */
2547   t &= GET_MODE_MASK (imode);
2548
2549   /* t == 1 can be done in zero cost.  */
2550   if (t == 1)
2551     {
2552       alg_out->ops = 1;
2553       alg_out->cost.cost = 0;
2554       alg_out->cost.latency = 0;
2555       alg_out->op[0] = alg_m;
2556       return;
2557     }
2558
2559   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2560      fail now.  */
2561   if (t == 0)
2562     {
2563       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2564         return;
2565       else
2566         {
2567           alg_out->ops = 1;
2568           alg_out->cost.cost = zero_cost (speed);
2569           alg_out->cost.latency = zero_cost (speed);
2570           alg_out->op[0] = alg_zero;
2571           return;
2572         }
2573     }
2574
2575   /* We'll be needing a couple extra algorithm structures now.  */
2576
2577   alg_in = XALLOCA (struct algorithm);
2578   best_alg = XALLOCA (struct algorithm);
2579   best_cost = *cost_limit;
2580
2581   /* Compute the hash index.  */
2582   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2583
2584   /* See if we already know what to do for T.  */
2585   entry_ptr = alg_hash_entry_ptr (hash_index);
2586   if (entry_ptr->t == t
2587       && entry_ptr->mode == mode
2588       && entry_ptr->speed == speed
2589       && entry_ptr->alg != alg_unknown)
2590     {
2591       cache_alg = entry_ptr->alg;
2592
2593       if (cache_alg == alg_impossible)
2594         {
2595           /* The cache tells us that it's impossible to synthesize
2596              multiplication by T within entry_ptr->cost.  */
2597           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2598             /* COST_LIMIT is at least as restrictive as the one
2599                recorded in the hash table, in which case we have no
2600                hope of synthesizing a multiplication.  Just
2601                return.  */
2602             return;
2603
2604           /* If we get here, COST_LIMIT is less restrictive than the
2605              one recorded in the hash table, so we may be able to
2606              synthesize a multiplication.  Proceed as if we didn't
2607              have the cache entry.  */
2608         }
2609       else
2610         {
2611           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2612             /* The cached algorithm shows that this multiplication
2613                requires more cost than COST_LIMIT.  Just return.  This
2614                way, we don't clobber this cache entry with
2615                alg_impossible but retain useful information.  */
2616             return;
2617
2618           cache_hit = true;
2619
2620           switch (cache_alg)
2621             {
2622             case alg_shift:
2623               goto do_alg_shift;
2624
2625             case alg_add_t_m2:
2626             case alg_sub_t_m2:
2627               goto do_alg_addsub_t_m2;
2628
2629             case alg_add_factor:
2630             case alg_sub_factor:
2631               goto do_alg_addsub_factor;
2632
2633             case alg_add_t2_m:
2634               goto do_alg_add_t2_m;
2635
2636             case alg_sub_t2_m:
2637               goto do_alg_sub_t2_m;
2638
2639             default:
2640               gcc_unreachable ();
2641             }
2642         }
2643     }
2644
2645   /* If we have a group of zero bits at the low-order part of T, try
2646      multiplying by the remaining bits and then doing a shift.  */
2647
2648   if ((t & 1) == 0)
2649     {
2650     do_alg_shift:
2651       m = ctz_or_zero (t); /* m = number of low zero bits */
2652       if (m < maxm)
2653         {
2654           q = t >> m;
2655           /* The function expand_shift will choose between a shift and
2656              a sequence of additions, so the observed cost is given as
2657              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2658           op_cost = m * add_cost (speed, mode);
2659           if (shift_cost (speed, mode, m) < op_cost)
2660             op_cost = shift_cost (speed, mode, m);
2661           new_limit.cost = best_cost.cost - op_cost;
2662           new_limit.latency = best_cost.latency - op_cost;
2663           synth_mult (alg_in, q, &new_limit, mode);
2664
2665           alg_in->cost.cost += op_cost;
2666           alg_in->cost.latency += op_cost;
2667           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2668             {
2669               best_cost = alg_in->cost;
2670               std::swap (alg_in, best_alg);
2671               best_alg->log[best_alg->ops] = m;
2672               best_alg->op[best_alg->ops] = alg_shift;
2673             }
2674
2675           /* See if treating ORIG_T as a signed number yields a better
2676              sequence.  Try this sequence only for a negative ORIG_T
2677              as it would be useless for a non-negative ORIG_T.  */
2678           if ((HOST_WIDE_INT) orig_t < 0)
2679             {
2680               /* Shift ORIG_T as follows because a right shift of a
2681                  negative-valued signed type is implementation
2682                  defined.  */
2683               q = ~(~orig_t >> m);
2684               /* The function expand_shift will choose between a shift
2685                  and a sequence of additions, so the observed cost is
2686                  given as MIN (m * add_cost(speed, mode),
2687                  shift_cost(speed, mode, m)).  */
2688               op_cost = m * add_cost (speed, mode);
2689               if (shift_cost (speed, mode, m) < op_cost)
2690                 op_cost = shift_cost (speed, mode, m);
2691               new_limit.cost = best_cost.cost - op_cost;
2692               new_limit.latency = best_cost.latency - op_cost;
2693               synth_mult (alg_in, q, &new_limit, mode);
2694
2695               alg_in->cost.cost += op_cost;
2696               alg_in->cost.latency += op_cost;
2697               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2698                 {
2699                   best_cost = alg_in->cost;
2700                   std::swap (alg_in, best_alg);
2701                   best_alg->log[best_alg->ops] = m;
2702                   best_alg->op[best_alg->ops] = alg_shift;
2703                 }
2704             }
2705         }
2706       if (cache_hit)
2707         goto done;
2708     }
2709
2710   /* If we have an odd number, add or subtract one.  */
2711   if ((t & 1) != 0)
2712     {
2713       unsigned HOST_WIDE_INT w;
2714
2715     do_alg_addsub_t_m2:
2716       for (w = 1; (w & t) != 0; w <<= 1)
2717         ;
2718       /* If T was -1, then W will be zero after the loop.  This is another
2719          case where T ends with ...111.  Handling this with (T + 1) and
2720          subtract 1 produces slightly better code and results in algorithm
2721          selection much faster than treating it like the ...0111 case
2722          below.  */
2723       if (w == 0
2724           || (w > 2
2725               /* Reject the case where t is 3.
2726                  Thus we prefer addition in that case.  */
2727               && t != 3))
2728         {
2729           /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2730
2731           op_cost = add_cost (speed, mode);
2732           new_limit.cost = best_cost.cost - op_cost;
2733           new_limit.latency = best_cost.latency - op_cost;
2734           synth_mult (alg_in, t + 1, &new_limit, mode);
2735
2736           alg_in->cost.cost += op_cost;
2737           alg_in->cost.latency += op_cost;
2738           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2739             {
2740               best_cost = alg_in->cost;
2741               std::swap (alg_in, best_alg);
2742               best_alg->log[best_alg->ops] = 0;
2743               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2744             }
2745         }
2746       else
2747         {
2748           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2749
2750           op_cost = add_cost (speed, mode);
2751           new_limit.cost = best_cost.cost - op_cost;
2752           new_limit.latency = best_cost.latency - op_cost;
2753           synth_mult (alg_in, t - 1, &new_limit, mode);
2754
2755           alg_in->cost.cost += op_cost;
2756           alg_in->cost.latency += op_cost;
2757           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2758             {
2759               best_cost = alg_in->cost;
2760               std::swap (alg_in, best_alg);
2761               best_alg->log[best_alg->ops] = 0;
2762               best_alg->op[best_alg->ops] = alg_add_t_m2;
2763             }
2764         }
2765
2766       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2767          quickly with a - a * n for some appropriate constant n.  */
2768       m = exact_log2 (-orig_t + 1);
2769       if (m >= 0 && m < maxm)
2770         {
2771           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2772           /* If the target has a cheap shift-and-subtract insn use
2773              that in preference to a shift insn followed by a sub insn.
2774              Assume that the shift-and-sub is "atomic" with a latency
2775              equal to it's cost, otherwise assume that on superscalar
2776              hardware the shift may be executed concurrently with the
2777              earlier steps in the algorithm.  */
2778           if (shiftsub1_cost (speed, mode, m) <= op_cost)
2779             {
2780               op_cost = shiftsub1_cost (speed, mode, m);
2781               op_latency = op_cost;
2782             }
2783           else
2784             op_latency = add_cost (speed, mode);
2785
2786           new_limit.cost = best_cost.cost - op_cost;
2787           new_limit.latency = best_cost.latency - op_latency;
2788           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2789                       &new_limit, mode);
2790
2791           alg_in->cost.cost += op_cost;
2792           alg_in->cost.latency += op_latency;
2793           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2794             {
2795               best_cost = alg_in->cost;
2796               std::swap (alg_in, best_alg);
2797               best_alg->log[best_alg->ops] = m;
2798               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2799             }
2800         }
2801
2802       if (cache_hit)
2803         goto done;
2804     }
2805
2806   /* Look for factors of t of the form
2807      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2808      If we find such a factor, we can multiply by t using an algorithm that
2809      multiplies by q, shift the result by m and add/subtract it to itself.
2810
2811      We search for large factors first and loop down, even if large factors
2812      are less probable than small; if we find a large factor we will find a
2813      good sequence quickly, and therefore be able to prune (by decreasing
2814      COST_LIMIT) the search.  */
2815
2816  do_alg_addsub_factor:
2817   for (m = floor_log2 (t - 1); m >= 2; m--)
2818     {
2819       unsigned HOST_WIDE_INT d;
2820
2821       d = (HOST_WIDE_INT_1U << m) + 1;
2822       if (t % d == 0 && t > d && m < maxm
2823           && (!cache_hit || cache_alg == alg_add_factor))
2824         {
2825           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2826           if (shiftadd_cost (speed, mode, m) <= op_cost)
2827             op_cost = shiftadd_cost (speed, mode, m);
2828
2829           op_latency = op_cost;
2830
2831
2832           new_limit.cost = best_cost.cost - op_cost;
2833           new_limit.latency = best_cost.latency - op_latency;
2834           synth_mult (alg_in, t / d, &new_limit, mode);
2835
2836           alg_in->cost.cost += op_cost;
2837           alg_in->cost.latency += op_latency;
2838           if (alg_in->cost.latency < op_cost)
2839             alg_in->cost.latency = op_cost;
2840           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2841             {
2842               best_cost = alg_in->cost;
2843               std::swap (alg_in, best_alg);
2844               best_alg->log[best_alg->ops] = m;
2845               best_alg->op[best_alg->ops] = alg_add_factor;
2846             }
2847           /* Other factors will have been taken care of in the recursion.  */
2848           break;
2849         }
2850
2851       d = (HOST_WIDE_INT_1U << m) - 1;
2852       if (t % d == 0 && t > d && m < maxm
2853           && (!cache_hit || cache_alg == alg_sub_factor))
2854         {
2855           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2856           if (shiftsub0_cost (speed, mode, m) <= op_cost)
2857             op_cost = shiftsub0_cost (speed, mode, m);
2858
2859           op_latency = op_cost;
2860
2861           new_limit.cost = best_cost.cost - op_cost;
2862           new_limit.latency = best_cost.latency - op_latency;
2863           synth_mult (alg_in, t / d, &new_limit, mode);
2864
2865           alg_in->cost.cost += op_cost;
2866           alg_in->cost.latency += op_latency;
2867           if (alg_in->cost.latency < op_cost)
2868             alg_in->cost.latency = op_cost;
2869           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2870             {
2871               best_cost = alg_in->cost;
2872               std::swap (alg_in, best_alg);
2873               best_alg->log[best_alg->ops] = m;
2874               best_alg->op[best_alg->ops] = alg_sub_factor;
2875             }
2876           break;
2877         }
2878     }
2879   if (cache_hit)
2880     goto done;
2881
2882   /* Try shift-and-add (load effective address) instructions,
2883      i.e. do a*3, a*5, a*9.  */
2884   if ((t & 1) != 0)
2885     {
2886     do_alg_add_t2_m:
2887       q = t - 1;
2888       m = ctz_hwi (q);
2889       if (q && m < maxm)
2890         {
2891           op_cost = shiftadd_cost (speed, mode, m);
2892           new_limit.cost = best_cost.cost - op_cost;
2893           new_limit.latency = best_cost.latency - op_cost;
2894           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2895
2896           alg_in->cost.cost += op_cost;
2897           alg_in->cost.latency += op_cost;
2898           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2899             {
2900               best_cost = alg_in->cost;
2901               std::swap (alg_in, best_alg);
2902               best_alg->log[best_alg->ops] = m;
2903               best_alg->op[best_alg->ops] = alg_add_t2_m;
2904             }
2905         }
2906       if (cache_hit)
2907         goto done;
2908
2909     do_alg_sub_t2_m:
2910       q = t + 1;
2911       m = ctz_hwi (q);
2912       if (q && m < maxm)
2913         {
2914           op_cost = shiftsub0_cost (speed, mode, m);
2915           new_limit.cost = best_cost.cost - op_cost;
2916           new_limit.latency = best_cost.latency - op_cost;
2917           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2918
2919           alg_in->cost.cost += op_cost;
2920           alg_in->cost.latency += op_cost;
2921           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2922             {
2923               best_cost = alg_in->cost;
2924               std::swap (alg_in, best_alg);
2925               best_alg->log[best_alg->ops] = m;
2926               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2927             }
2928         }
2929       if (cache_hit)
2930         goto done;
2931     }
2932
2933  done:
2934   /* If best_cost has not decreased, we have not found any algorithm.  */
2935   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2936     {
2937       /* We failed to find an algorithm.  Record alg_impossible for
2938          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2939          we are asked to find an algorithm for T within the same or
2940          lower COST_LIMIT, we can immediately return to the
2941          caller.  */
2942       entry_ptr->t = t;
2943       entry_ptr->mode = mode;
2944       entry_ptr->speed = speed;
2945       entry_ptr->alg = alg_impossible;
2946       entry_ptr->cost = *cost_limit;
2947       return;
2948     }
2949
2950   /* Cache the result.  */
2951   if (!cache_hit)
2952     {
2953       entry_ptr->t = t;
2954       entry_ptr->mode = mode;
2955       entry_ptr->speed = speed;
2956       entry_ptr->alg = best_alg->op[best_alg->ops];
2957       entry_ptr->cost.cost = best_cost.cost;
2958       entry_ptr->cost.latency = best_cost.latency;
2959     }
2960
2961   /* If we are getting a too long sequence for `struct algorithm'
2962      to record, make this search fail.  */
2963   if (best_alg->ops == MAX_BITS_PER_WORD)
2964     return;
2965
2966   /* Copy the algorithm from temporary space to the space at alg_out.
2967      We avoid using structure assignment because the majority of
2968      best_alg is normally undefined, and this is a critical function.  */
2969   alg_out->ops = best_alg->ops + 1;
2970   alg_out->cost = best_cost;
2971   memcpy (alg_out->op, best_alg->op,
2972           alg_out->ops * sizeof *alg_out->op);
2973   memcpy (alg_out->log, best_alg->log,
2974           alg_out->ops * sizeof *alg_out->log);
2975 }
2976 \f
2977 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2978    Try three variations:
2979
2980        - a shift/add sequence based on VAL itself
2981        - a shift/add sequence based on -VAL, followed by a negation
2982        - a shift/add sequence based on VAL - 1, followed by an addition.
2983
2984    Return true if the cheapest of these cost less than MULT_COST,
2985    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2986
2987 bool
2988 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
2989                      struct algorithm *alg, enum mult_variant *variant,
2990                      int mult_cost)
2991 {
2992   struct algorithm alg2;
2993   struct mult_cost limit;
2994   int op_cost;
2995   bool speed = optimize_insn_for_speed_p ();
2996
2997   /* Fail quickly for impossible bounds.  */
2998   if (mult_cost < 0)
2999     return false;
3000
3001   /* Ensure that mult_cost provides a reasonable upper bound.
3002      Any constant multiplication can be performed with less
3003      than 2 * bits additions.  */
3004   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3005   if (mult_cost > op_cost)
3006     mult_cost = op_cost;
3007
3008   *variant = basic_variant;
3009   limit.cost = mult_cost;
3010   limit.latency = mult_cost;
3011   synth_mult (alg, val, &limit, mode);
3012
3013   /* This works only if the inverted value actually fits in an
3014      `unsigned int' */
3015   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3016     {
3017       op_cost = neg_cost (speed, mode);
3018       if (MULT_COST_LESS (&alg->cost, mult_cost))
3019         {
3020           limit.cost = alg->cost.cost - op_cost;
3021           limit.latency = alg->cost.latency - op_cost;
3022         }
3023       else
3024         {
3025           limit.cost = mult_cost - op_cost;
3026           limit.latency = mult_cost - op_cost;
3027         }
3028
3029       synth_mult (&alg2, -val, &limit, mode);
3030       alg2.cost.cost += op_cost;
3031       alg2.cost.latency += op_cost;
3032       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3033         *alg = alg2, *variant = negate_variant;
3034     }
3035
3036   /* This proves very useful for division-by-constant.  */
3037   op_cost = add_cost (speed, mode);
3038   if (MULT_COST_LESS (&alg->cost, mult_cost))
3039     {
3040       limit.cost = alg->cost.cost - op_cost;
3041       limit.latency = alg->cost.latency - op_cost;
3042     }
3043   else
3044     {
3045       limit.cost = mult_cost - op_cost;
3046       limit.latency = mult_cost - op_cost;
3047     }
3048
3049   synth_mult (&alg2, val - 1, &limit, mode);
3050   alg2.cost.cost += op_cost;
3051   alg2.cost.latency += op_cost;
3052   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3053     *alg = alg2, *variant = add_variant;
3054
3055   return MULT_COST_LESS (&alg->cost, mult_cost);
3056 }
3057
3058 /* A subroutine of expand_mult, used for constant multiplications.
3059    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3060    convenient.  Use the shift/add sequence described by ALG and apply
3061    the final fixup specified by VARIANT.  */
3062
3063 static rtx
3064 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3065                    rtx target, const struct algorithm *alg,
3066                    enum mult_variant variant)
3067 {
3068   unsigned HOST_WIDE_INT val_so_far;
3069   rtx_insn *insn;
3070   rtx accum, tem;
3071   int opno;
3072   machine_mode nmode;
3073
3074   /* Avoid referencing memory over and over and invalid sharing
3075      on SUBREGs.  */
3076   op0 = force_reg (mode, op0);
3077
3078   /* ACCUM starts out either as OP0 or as a zero, depending on
3079      the first operation.  */
3080
3081   if (alg->op[0] == alg_zero)
3082     {
3083       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3084       val_so_far = 0;
3085     }
3086   else if (alg->op[0] == alg_m)
3087     {
3088       accum = copy_to_mode_reg (mode, op0);
3089       val_so_far = 1;
3090     }
3091   else
3092     gcc_unreachable ();
3093
3094   for (opno = 1; opno < alg->ops; opno++)
3095     {
3096       int log = alg->log[opno];
3097       rtx shift_subtarget = optimize ? 0 : accum;
3098       rtx add_target
3099         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3100            && !optimize)
3101           ? target : 0;
3102       rtx accum_target = optimize ? 0 : accum;
3103       rtx accum_inner;
3104
3105       switch (alg->op[opno])
3106         {
3107         case alg_shift:
3108           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3109           /* REG_EQUAL note will be attached to the following insn.  */
3110           emit_move_insn (accum, tem);
3111           val_so_far <<= log;
3112           break;
3113
3114         case alg_add_t_m2:
3115           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3116           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3117                                  add_target ? add_target : accum_target);
3118           val_so_far += HOST_WIDE_INT_1U << log;
3119           break;
3120
3121         case alg_sub_t_m2:
3122           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3123           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3124                                  add_target ? add_target : accum_target);
3125           val_so_far -= HOST_WIDE_INT_1U << log;
3126           break;
3127
3128         case alg_add_t2_m:
3129           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3130                                 log, shift_subtarget, 0);
3131           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3132                                  add_target ? add_target : accum_target);
3133           val_so_far = (val_so_far << log) + 1;
3134           break;
3135
3136         case alg_sub_t2_m:
3137           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3138                                 log, shift_subtarget, 0);
3139           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3140                                  add_target ? add_target : accum_target);
3141           val_so_far = (val_so_far << log) - 1;
3142           break;
3143
3144         case alg_add_factor:
3145           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3146           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3147                                  add_target ? add_target : accum_target);
3148           val_so_far += val_so_far << log;
3149           break;
3150
3151         case alg_sub_factor:
3152           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3153           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3154                                  (add_target
3155                                   ? add_target : (optimize ? 0 : tem)));
3156           val_so_far = (val_so_far << log) - val_so_far;
3157           break;
3158
3159         default:
3160           gcc_unreachable ();
3161         }
3162
3163       if (SCALAR_INT_MODE_P (mode))
3164         {
3165           /* Write a REG_EQUAL note on the last insn so that we can cse
3166              multiplication sequences.  Note that if ACCUM is a SUBREG,
3167              we've set the inner register and must properly indicate that.  */
3168           tem = op0, nmode = mode;
3169           accum_inner = accum;
3170           if (GET_CODE (accum) == SUBREG)
3171             {
3172               accum_inner = SUBREG_REG (accum);
3173               nmode = GET_MODE (accum_inner);
3174               tem = gen_lowpart (nmode, op0);
3175             }
3176
3177           insn = get_last_insn ();
3178           set_dst_reg_note (insn, REG_EQUAL,
3179                             gen_rtx_MULT (nmode, tem,
3180                                           gen_int_mode (val_so_far, nmode)),
3181                             accum_inner);
3182         }
3183     }
3184
3185   if (variant == negate_variant)
3186     {
3187       val_so_far = -val_so_far;
3188       accum = expand_unop (mode, neg_optab, accum, target, 0);
3189     }
3190   else if (variant == add_variant)
3191     {
3192       val_so_far = val_so_far + 1;
3193       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3194     }
3195
3196   /* Compare only the bits of val and val_so_far that are significant
3197      in the result mode, to avoid sign-/zero-extension confusion.  */
3198   nmode = GET_MODE_INNER (mode);
3199   val &= GET_MODE_MASK (nmode);
3200   val_so_far &= GET_MODE_MASK (nmode);
3201   gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3202
3203   return accum;
3204 }
3205
3206 /* Perform a multiplication and return an rtx for the result.
3207    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3208    TARGET is a suggestion for where to store the result (an rtx).
3209
3210    We check specially for a constant integer as OP1.
3211    If you want this check for OP0 as well, then before calling
3212    you should swap the two operands if OP0 would be constant.  */
3213
3214 rtx
3215 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3216              int unsignedp)
3217 {
3218   enum mult_variant variant;
3219   struct algorithm algorithm;
3220   rtx scalar_op1;
3221   int max_cost;
3222   bool speed = optimize_insn_for_speed_p ();
3223   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3224
3225   if (CONSTANT_P (op0))
3226     std::swap (op0, op1);
3227
3228   /* For vectors, there are several simplifications that can be made if
3229      all elements of the vector constant are identical.  */
3230   scalar_op1 = unwrap_const_vec_duplicate (op1);
3231
3232   if (INTEGRAL_MODE_P (mode))
3233     {
3234       rtx fake_reg;
3235       HOST_WIDE_INT coeff;
3236       bool is_neg;
3237       int mode_bitsize;
3238
3239       if (op1 == CONST0_RTX (mode))
3240         return op1;
3241       if (op1 == CONST1_RTX (mode))
3242         return op0;
3243       if (op1 == CONSTM1_RTX (mode))
3244         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3245                             op0, target, 0);
3246
3247       if (do_trapv)
3248         goto skip_synth;
3249
3250       /* If mode is integer vector mode, check if the backend supports
3251          vector lshift (by scalar or vector) at all.  If not, we can't use
3252          synthetized multiply.  */
3253       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3254           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3255           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3256         goto skip_synth;
3257
3258       /* These are the operations that are potentially turned into
3259          a sequence of shifts and additions.  */
3260       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3261
3262       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3263          less than or equal in size to `unsigned int' this doesn't matter.
3264          If the mode is larger than `unsigned int', then synth_mult works
3265          only if the constant value exactly fits in an `unsigned int' without
3266          any truncation.  This means that multiplying by negative values does
3267          not work; results are off by 2^32 on a 32 bit machine.  */
3268       if (CONST_INT_P (scalar_op1))
3269         {
3270           coeff = INTVAL (scalar_op1);
3271           is_neg = coeff < 0;
3272         }
3273 #if TARGET_SUPPORTS_WIDE_INT
3274       else if (CONST_WIDE_INT_P (scalar_op1))
3275 #else
3276       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3277 #endif
3278         {
3279           int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3280           /* Perfect power of 2 (other than 1, which is handled above).  */
3281           if (shift > 0)
3282             return expand_shift (LSHIFT_EXPR, mode, op0,
3283                                  shift, target, unsignedp);
3284           else
3285             goto skip_synth;
3286         }
3287       else
3288         goto skip_synth;
3289
3290       /* We used to test optimize here, on the grounds that it's better to
3291          produce a smaller program when -O is not used.  But this causes
3292          such a terrible slowdown sometimes that it seems better to always
3293          use synth_mult.  */
3294
3295       /* Special case powers of two.  */
3296       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3297           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3298         return expand_shift (LSHIFT_EXPR, mode, op0,
3299                              floor_log2 (coeff), target, unsignedp);
3300
3301       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3302
3303       /* Attempt to handle multiplication of DImode values by negative
3304          coefficients, by performing the multiplication by a positive
3305          multiplier and then inverting the result.  */
3306       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3307         {
3308           /* Its safe to use -coeff even for INT_MIN, as the
3309              result is interpreted as an unsigned coefficient.
3310              Exclude cost of op0 from max_cost to match the cost
3311              calculation of the synth_mult.  */
3312           coeff = -(unsigned HOST_WIDE_INT) coeff;
3313           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3314                                     mode, speed)
3315                       - neg_cost (speed, mode));
3316           if (max_cost <= 0)
3317             goto skip_synth;
3318
3319           /* Special case powers of two.  */
3320           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3321             {
3322               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3323                                        floor_log2 (coeff), target, unsignedp);
3324               return expand_unop (mode, neg_optab, temp, target, 0);
3325             }
3326
3327           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3328                                    max_cost))
3329             {
3330               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3331                                             &algorithm, variant);
3332               return expand_unop (mode, neg_optab, temp, target, 0);
3333             }
3334           goto skip_synth;
3335         }
3336
3337       /* Exclude cost of op0 from max_cost to match the cost
3338          calculation of the synth_mult.  */
3339       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3340       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3341         return expand_mult_const (mode, op0, coeff, target,
3342                                   &algorithm, variant);
3343     }
3344  skip_synth:
3345
3346   /* Expand x*2.0 as x+x.  */
3347   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3348       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3349     {
3350       op0 = force_reg (GET_MODE (op0), op0);
3351       return expand_binop (mode, add_optab, op0, op0,
3352                            target, unsignedp, OPTAB_LIB_WIDEN);
3353     }
3354
3355   /* This used to use umul_optab if unsigned, but for non-widening multiply
3356      there is no difference between signed and unsigned.  */
3357   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3358                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3359   gcc_assert (op0);
3360   return op0;
3361 }
3362
3363 /* Return a cost estimate for multiplying a register by the given
3364    COEFFicient in the given MODE and SPEED.  */
3365
3366 int
3367 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3368 {
3369   int max_cost;
3370   struct algorithm algorithm;
3371   enum mult_variant variant;
3372
3373   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3374   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3375                            mode, speed);
3376   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3377     return algorithm.cost.cost;
3378   else
3379     return max_cost;
3380 }
3381
3382 /* Perform a widening multiplication and return an rtx for the result.
3383    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3384    TARGET is a suggestion for where to store the result (an rtx).
3385    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3386    or smul_widen_optab.
3387
3388    We check specially for a constant integer as OP1, comparing the
3389    cost of a widening multiply against the cost of a sequence of shifts
3390    and adds.  */
3391
3392 rtx
3393 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3394                       int unsignedp, optab this_optab)
3395 {
3396   bool speed = optimize_insn_for_speed_p ();
3397   rtx cop1;
3398
3399   if (CONST_INT_P (op1)
3400       && GET_MODE (op0) != VOIDmode
3401       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3402                                 this_optab == umul_widen_optab))
3403       && CONST_INT_P (cop1)
3404       && (INTVAL (cop1) >= 0
3405           || HWI_COMPUTABLE_MODE_P (mode)))
3406     {
3407       HOST_WIDE_INT coeff = INTVAL (cop1);
3408       int max_cost;
3409       enum mult_variant variant;
3410       struct algorithm algorithm;
3411
3412       if (coeff == 0)
3413         return CONST0_RTX (mode);
3414
3415       /* Special case powers of two.  */
3416       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3417         {
3418           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3419           return expand_shift (LSHIFT_EXPR, mode, op0,
3420                                floor_log2 (coeff), target, unsignedp);
3421         }
3422
3423       /* Exclude cost of op0 from max_cost to match the cost
3424          calculation of the synth_mult.  */
3425       max_cost = mul_widen_cost (speed, mode);
3426       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3427                                max_cost))
3428         {
3429           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3430           return expand_mult_const (mode, op0, coeff, target,
3431                                     &algorithm, variant);
3432         }
3433     }
3434   return expand_binop (mode, this_optab, op0, op1, target,
3435                        unsignedp, OPTAB_LIB_WIDEN);
3436 }
3437 \f
3438 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3439    replace division by D, and put the least significant N bits of the result
3440    in *MULTIPLIER_PTR and return the most significant bit.
3441
3442    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3443    needed precision is in PRECISION (should be <= N).
3444
3445    PRECISION should be as small as possible so this function can choose
3446    multiplier more freely.
3447
3448    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3449    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3450
3451    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3452    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3453
3454 unsigned HOST_WIDE_INT
3455 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3456                    unsigned HOST_WIDE_INT *multiplier_ptr,
3457                    int *post_shift_ptr, int *lgup_ptr)
3458 {
3459   int lgup, post_shift;
3460   int pow, pow2;
3461
3462   /* lgup = ceil(log2(divisor)); */
3463   lgup = ceil_log2 (d);
3464
3465   gcc_assert (lgup <= n);
3466
3467   pow = n + lgup;
3468   pow2 = n + lgup - precision;
3469
3470   /* mlow = 2^(N + lgup)/d */
3471   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3472   wide_int mlow = wi::udiv_trunc (val, d);
3473
3474   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3475   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3476   wide_int mhigh = wi::udiv_trunc (val, d);
3477
3478   /* If precision == N, then mlow, mhigh exceed 2^N
3479      (but they do not exceed 2^(N+1)).  */
3480
3481   /* Reduce to lowest terms.  */
3482   for (post_shift = lgup; post_shift > 0; post_shift--)
3483     {
3484       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3485                                                        HOST_BITS_PER_WIDE_INT);
3486       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3487                                                        HOST_BITS_PER_WIDE_INT);
3488       if (ml_lo >= mh_lo)
3489         break;
3490
3491       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3492       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3493     }
3494
3495   *post_shift_ptr = post_shift;
3496   *lgup_ptr = lgup;
3497   if (n < HOST_BITS_PER_WIDE_INT)
3498     {
3499       unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3500       *multiplier_ptr = mhigh.to_uhwi () & mask;
3501       return mhigh.to_uhwi () >= mask;
3502     }
3503   else
3504     {
3505       *multiplier_ptr = mhigh.to_uhwi ();
3506       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3507     }
3508 }
3509
3510 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3511    congruent to 1 (mod 2**N).  */
3512
3513 static unsigned HOST_WIDE_INT
3514 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3515 {
3516   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3517
3518   /* The algorithm notes that the choice y = x satisfies
3519      x*y == 1 mod 2^3, since x is assumed odd.
3520      Each iteration doubles the number of bits of significance in y.  */
3521
3522   unsigned HOST_WIDE_INT mask;
3523   unsigned HOST_WIDE_INT y = x;
3524   int nbit = 3;
3525
3526   mask = (n == HOST_BITS_PER_WIDE_INT
3527           ? HOST_WIDE_INT_M1U
3528           : (HOST_WIDE_INT_1U << n) - 1);
3529
3530   while (nbit < n)
3531     {
3532       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3533       nbit *= 2;
3534     }
3535   return y;
3536 }
3537
3538 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3539    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3540    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3541    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3542    become signed.
3543
3544    The result is put in TARGET if that is convenient.
3545
3546    MODE is the mode of operation.  */
3547
3548 rtx
3549 expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
3550                              rtx op1, rtx target, int unsignedp)
3551 {
3552   rtx tem;
3553   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3554
3555   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3556                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3557   tem = expand_and (mode, tem, op1, NULL_RTX);
3558   adj_operand
3559     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3560                      adj_operand);
3561
3562   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3563                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3564   tem = expand_and (mode, tem, op0, NULL_RTX);
3565   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3566                           target);
3567
3568   return target;
3569 }
3570
3571 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3572
3573 static rtx
3574 extract_high_half (machine_mode mode, rtx op)
3575 {
3576   machine_mode wider_mode;
3577
3578   if (mode == word_mode)
3579     return gen_highpart (mode, op);
3580
3581   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3582
3583   wider_mode = GET_MODE_WIDER_MODE (mode);
3584   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3585                      GET_MODE_BITSIZE (mode), 0, 1);
3586   return convert_modes (mode, wider_mode, op, 0);
3587 }
3588
3589 /* Like expmed_mult_highpart, but only consider using a multiplication
3590    optab.  OP1 is an rtx for the constant operand.  */
3591
3592 static rtx
3593 expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
3594                             rtx target, int unsignedp, int max_cost)
3595 {
3596   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3597   machine_mode wider_mode;
3598   optab moptab;
3599   rtx tem;
3600   int size;
3601   bool speed = optimize_insn_for_speed_p ();
3602
3603   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3604
3605   wider_mode = GET_MODE_WIDER_MODE (mode);
3606   size = GET_MODE_BITSIZE (mode);
3607
3608   /* Firstly, try using a multiplication insn that only generates the needed
3609      high part of the product, and in the sign flavor of unsignedp.  */
3610   if (mul_highpart_cost (speed, mode) < max_cost)
3611     {
3612       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3613       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3614                           unsignedp, OPTAB_DIRECT);
3615       if (tem)
3616         return tem;
3617     }
3618
3619   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3620      Need to adjust the result after the multiplication.  */
3621   if (size - 1 < BITS_PER_WORD
3622       && (mul_highpart_cost (speed, mode)
3623           + 2 * shift_cost (speed, mode, size-1)
3624           + 4 * add_cost (speed, mode) < max_cost))
3625     {
3626       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3627       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3628                           unsignedp, OPTAB_DIRECT);
3629       if (tem)
3630         /* We used the wrong signedness.  Adjust the result.  */
3631         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3632                                             tem, unsignedp);
3633     }
3634
3635   /* Try widening multiplication.  */
3636   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3637   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3638       && mul_widen_cost (speed, wider_mode) < max_cost)
3639     {
3640       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3641                           unsignedp, OPTAB_WIDEN);
3642       if (tem)
3643         return extract_high_half (mode, tem);
3644     }
3645
3646   /* Try widening the mode and perform a non-widening multiplication.  */
3647   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3648       && size - 1 < BITS_PER_WORD
3649       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3650           < max_cost))
3651     {
3652       rtx_insn *insns;
3653       rtx wop0, wop1;
3654
3655       /* We need to widen the operands, for example to ensure the
3656          constant multiplier is correctly sign or zero extended.
3657          Use a sequence to clean-up any instructions emitted by
3658          the conversions if things don't work out.  */
3659       start_sequence ();
3660       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3661       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3662       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3663                           unsignedp, OPTAB_WIDEN);
3664       insns = get_insns ();
3665       end_sequence ();
3666
3667       if (tem)
3668         {
3669           emit_insn (insns);
3670           return extract_high_half (mode, tem);
3671         }
3672     }
3673
3674   /* Try widening multiplication of opposite signedness, and adjust.  */
3675   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3676   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3677       && size - 1 < BITS_PER_WORD
3678       && (mul_widen_cost (speed, wider_mode)
3679           + 2 * shift_cost (speed, mode, size-1)
3680           + 4 * add_cost (speed, mode) < max_cost))
3681     {
3682       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3683                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3684       if (tem != 0)
3685         {
3686           tem = extract_high_half (mode, tem);
3687           /* We used the wrong signedness.  Adjust the result.  */
3688           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3689                                               target, unsignedp);
3690         }
3691     }
3692
3693   return 0;
3694 }
3695
3696 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3697    putting the high half of the result in TARGET if that is convenient,
3698    and return where the result is.  If the operation can not be performed,
3699    0 is returned.
3700
3701    MODE is the mode of operation and result.
3702
3703    UNSIGNEDP nonzero means unsigned multiply.
3704
3705    MAX_COST is the total allowed cost for the expanded RTL.  */
3706
3707 static rtx
3708 expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
3709                       rtx target, int unsignedp, int max_cost)
3710 {
3711   machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3712   unsigned HOST_WIDE_INT cnst1;
3713   int extra_cost;
3714   bool sign_adjust = false;
3715   enum mult_variant variant;
3716   struct algorithm alg;
3717   rtx tem;
3718   bool speed = optimize_insn_for_speed_p ();
3719
3720   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3721   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3722   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3723
3724   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3725
3726   /* We can't optimize modes wider than BITS_PER_WORD.
3727      ??? We might be able to perform double-word arithmetic if
3728      mode == word_mode, however all the cost calculations in
3729      synth_mult etc. assume single-word operations.  */
3730   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3731     return expmed_mult_highpart_optab (mode, op0, op1, target,
3732                                        unsignedp, max_cost);
3733
3734   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3735
3736   /* Check whether we try to multiply by a negative constant.  */
3737   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3738     {
3739       sign_adjust = true;
3740       extra_cost += add_cost (speed, mode);
3741     }
3742
3743   /* See whether shift/add multiplication is cheap enough.  */
3744   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3745                            max_cost - extra_cost))
3746     {
3747       /* See whether the specialized multiplication optabs are
3748          cheaper than the shift/add version.  */
3749       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3750                                         alg.cost.cost + extra_cost);
3751       if (tem)
3752         return tem;
3753
3754       tem = convert_to_mode (wider_mode, op0, unsignedp);
3755       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3756       tem = extract_high_half (mode, tem);
3757
3758       /* Adjust result for signedness.  */
3759       if (sign_adjust)
3760         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3761
3762       return tem;
3763     }
3764   return expmed_mult_highpart_optab (mode, op0, op1, target,
3765                                      unsignedp, max_cost);
3766 }
3767
3768
3769 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3770
3771 static rtx
3772 expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3773 {
3774   rtx result, temp, shift;
3775   rtx_code_label *label;
3776   int logd;
3777   int prec = GET_MODE_PRECISION (mode);
3778
3779   logd = floor_log2 (d);
3780   result = gen_reg_rtx (mode);
3781
3782   /* Avoid conditional branches when they're expensive.  */
3783   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3784       && optimize_insn_for_speed_p ())
3785     {
3786       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3787                                       mode, 0, -1);
3788       if (signmask)
3789         {
3790           HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
3791           signmask = force_reg (mode, signmask);
3792           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3793
3794           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3795              which instruction sequence to use.  If logical right shifts
3796              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3797              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3798
3799           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3800           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3801               || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
3802                   > COSTS_N_INSNS (2)))
3803             {
3804               temp = expand_binop (mode, xor_optab, op0, signmask,
3805                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3806               temp = expand_binop (mode, sub_optab, temp, signmask,
3807                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3808               temp = expand_binop (mode, and_optab, temp,
3809                                    gen_int_mode (masklow, mode),
3810                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3811               temp = expand_binop (mode, xor_optab, temp, signmask,
3812                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3813               temp = expand_binop (mode, sub_optab, temp, signmask,
3814                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3815             }
3816           else
3817             {
3818               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3819                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3820               signmask = force_reg (mode, signmask);
3821
3822               temp = expand_binop (mode, add_optab, op0, signmask,
3823                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3824               temp = expand_binop (mode, and_optab, temp,
3825                                    gen_int_mode (masklow, mode),
3826                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3827               temp = expand_binop (mode, sub_optab, temp, signmask,
3828                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3829             }
3830           return temp;
3831         }
3832     }
3833
3834   /* Mask contains the mode's signbit and the significant bits of the
3835      modulus.  By including the signbit in the operation, many targets
3836      can avoid an explicit compare operation in the following comparison
3837      against zero.  */
3838   wide_int mask = wi::mask (logd, false, prec);
3839   mask = wi::set_bit (mask, prec - 1);
3840
3841   temp = expand_binop (mode, and_optab, op0,
3842                        immed_wide_int_const (mask, mode),
3843                        result, 1, OPTAB_LIB_WIDEN);
3844   if (temp != result)
3845     emit_move_insn (result, temp);
3846
3847   label = gen_label_rtx ();
3848   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3849
3850   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3851                        0, OPTAB_LIB_WIDEN);
3852
3853   mask = wi::mask (logd, true, prec);
3854   temp = expand_binop (mode, ior_optab, temp,
3855                        immed_wide_int_const (mask, mode),
3856                        result, 1, OPTAB_LIB_WIDEN);
3857   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3858                        0, OPTAB_LIB_WIDEN);
3859   if (temp != result)
3860     emit_move_insn (result, temp);
3861   emit_label (label);
3862   return result;
3863 }
3864
3865 /* Expand signed division of OP0 by a power of two D in mode MODE.
3866    This routine is only called for positive values of D.  */
3867
3868 static rtx
3869 expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3870 {
3871   rtx temp;
3872   rtx_code_label *label;
3873   int logd;
3874
3875   logd = floor_log2 (d);
3876
3877   if (d == 2
3878       && BRANCH_COST (optimize_insn_for_speed_p (),
3879                       false) >= 1)
3880     {
3881       temp = gen_reg_rtx (mode);
3882       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3883       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3884                            0, OPTAB_LIB_WIDEN);
3885       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3886     }
3887
3888   if (HAVE_conditional_move
3889       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
3890     {
3891       rtx temp2;
3892
3893       start_sequence ();
3894       temp2 = copy_to_mode_reg (mode, op0);
3895       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3896                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3897       temp = force_reg (mode, temp);
3898
3899       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3900       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3901                                      mode, temp, temp2, mode, 0);
3902       if (temp2)
3903         {
3904           rtx_insn *seq = get_insns ();
3905           end_sequence ();
3906           emit_insn (seq);
3907           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3908         }
3909       end_sequence ();
3910     }
3911
3912   if (BRANCH_COST (optimize_insn_for_speed_p (),
3913                    false) >= 2)
3914     {
3915       int ushift = GET_MODE_BITSIZE (mode) - logd;
3916
3917       temp = gen_reg_rtx (mode);
3918       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3919       if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3920           || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3921              > COSTS_N_INSNS (1))
3922         temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3923                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3924       else
3925         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3926                              ushift, NULL_RTX, 1);
3927       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3928                            0, OPTAB_LIB_WIDEN);
3929       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3930     }
3931
3932   label = gen_label_rtx ();
3933   temp = copy_to_mode_reg (mode, op0);
3934   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3935   expand_inc (temp, gen_int_mode (d - 1, mode));
3936   emit_label (label);
3937   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3938 }
3939 \f
3940 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3941    if that is convenient, and returning where the result is.
3942    You may request either the quotient or the remainder as the result;
3943    specify REM_FLAG nonzero to get the remainder.
3944
3945    CODE is the expression code for which kind of division this is;
3946    it controls how rounding is done.  MODE is the machine mode to use.
3947    UNSIGNEDP nonzero means do unsigned division.  */
3948
3949 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3950    and then correct it by or'ing in missing high bits
3951    if result of ANDI is nonzero.
3952    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3953    This could optimize to a bfexts instruction.
3954    But C doesn't use these operations, so their optimizations are
3955    left for later.  */
3956 /* ??? For modulo, we don't actually need the highpart of the first product,
3957    the low part will do nicely.  And for small divisors, the second multiply
3958    can also be a low-part only multiply or even be completely left out.
3959    E.g. to calculate the remainder of a division by 3 with a 32 bit
3960    multiply, multiply with 0x55555556 and extract the upper two bits;
3961    the result is exact for inputs up to 0x1fffffff.
3962    The input range can be reduced by using cross-sum rules.
3963    For odd divisors >= 3, the following table gives right shift counts
3964    so that if a number is shifted by an integer multiple of the given
3965    amount, the remainder stays the same:
3966    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3967    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3968    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3969    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3970    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3971
3972    Cross-sum rules for even numbers can be derived by leaving as many bits
3973    to the right alone as the divisor has zeros to the right.
3974    E.g. if x is an unsigned 32 bit number:
3975    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3976    */
3977
3978 rtx
3979 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
3980                rtx op0, rtx op1, rtx target, int unsignedp)
3981 {
3982   machine_mode compute_mode;
3983   rtx tquotient;
3984   rtx quotient = 0, remainder = 0;
3985   rtx_insn *last;
3986   int size;
3987   rtx_insn *insn;
3988   optab optab1, optab2;
3989   int op1_is_constant, op1_is_pow2 = 0;
3990   int max_cost, extra_cost;
3991   static HOST_WIDE_INT last_div_const = 0;
3992   bool speed = optimize_insn_for_speed_p ();
3993
3994   op1_is_constant = CONST_INT_P (op1);
3995   if (op1_is_constant)
3996     {
3997       unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1);
3998       if (unsignedp)
3999         ext_op1 &= GET_MODE_MASK (mode);
4000       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
4001                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
4002     }
4003
4004   /*
4005      This is the structure of expand_divmod:
4006
4007      First comes code to fix up the operands so we can perform the operations
4008      correctly and efficiently.
4009
4010      Second comes a switch statement with code specific for each rounding mode.
4011      For some special operands this code emits all RTL for the desired
4012      operation, for other cases, it generates only a quotient and stores it in
4013      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4014      to indicate that it has not done anything.
4015
4016      Last comes code that finishes the operation.  If QUOTIENT is set and
4017      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4018      QUOTIENT is not set, it is computed using trunc rounding.
4019
4020      We try to generate special code for division and remainder when OP1 is a
4021      constant.  If |OP1| = 2**n we can use shifts and some other fast
4022      operations.  For other values of OP1, we compute a carefully selected
4023      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4024      by m.
4025
4026      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4027      half of the product.  Different strategies for generating the product are
4028      implemented in expmed_mult_highpart.
4029
4030      If what we actually want is the remainder, we generate that by another
4031      by-constant multiplication and a subtraction.  */
4032
4033   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4034      code below will malfunction if we are, so check here and handle
4035      the special case if so.  */
4036   if (op1 == const1_rtx)
4037     return rem_flag ? const0_rtx : op0;
4038
4039     /* When dividing by -1, we could get an overflow.
4040      negv_optab can handle overflows.  */
4041   if (! unsignedp && op1 == constm1_rtx)
4042     {
4043       if (rem_flag)
4044         return const0_rtx;
4045       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4046                           ? negv_optab : neg_optab, op0, target, 0);
4047     }
4048
4049   if (target
4050       /* Don't use the function value register as a target
4051          since we have to read it as well as write it,
4052          and function-inlining gets confused by this.  */
4053       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4054           /* Don't clobber an operand while doing a multi-step calculation.  */
4055           || ((rem_flag || op1_is_constant)
4056               && (reg_mentioned_p (target, op0)
4057                   || (MEM_P (op0) && MEM_P (target))))
4058           || reg_mentioned_p (target, op1)
4059           || (MEM_P (op1) && MEM_P (target))))
4060     target = 0;
4061
4062   /* Get the mode in which to perform this computation.  Normally it will
4063      be MODE, but sometimes we can't do the desired operation in MODE.
4064      If so, pick a wider mode in which we can do the operation.  Convert
4065      to that mode at the start to avoid repeated conversions.
4066
4067      First see what operations we need.  These depend on the expression
4068      we are evaluating.  (We assume that divxx3 insns exist under the
4069      same conditions that modxx3 insns and that these insns don't normally
4070      fail.  If these assumptions are not correct, we may generate less
4071      efficient code in some cases.)
4072
4073      Then see if we find a mode in which we can open-code that operation
4074      (either a division, modulus, or shift).  Finally, check for the smallest
4075      mode for which we can do the operation with a library call.  */
4076
4077   /* We might want to refine this now that we have division-by-constant
4078      optimization.  Since expmed_mult_highpart tries so many variants, it is
4079      not straightforward to generalize this.  Maybe we should make an array
4080      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4081
4082   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
4083             ? (unsignedp ? lshr_optab : ashr_optab)
4084             : (unsignedp ? udiv_optab : sdiv_optab));
4085   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
4086             ? optab1
4087             : (unsignedp ? udivmod_optab : sdivmod_optab));
4088
4089   for (compute_mode = mode; compute_mode != VOIDmode;
4090        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4091     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4092         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4093       break;
4094
4095   if (compute_mode == VOIDmode)
4096     for (compute_mode = mode; compute_mode != VOIDmode;
4097          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4098       if (optab_libfunc (optab1, compute_mode)
4099           || optab_libfunc (optab2, compute_mode))
4100         break;
4101
4102   /* If we still couldn't find a mode, use MODE, but expand_binop will
4103      probably die.  */
4104   if (compute_mode == VOIDmode)
4105     compute_mode = mode;
4106
4107   if (target && GET_MODE (target) == compute_mode)
4108     tquotient = target;
4109   else
4110     tquotient = gen_reg_rtx (compute_mode);
4111
4112   size = GET_MODE_BITSIZE (compute_mode);
4113 #if 0
4114   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4115      (mode), and thereby get better code when OP1 is a constant.  Do that
4116      later.  It will require going over all usages of SIZE below.  */
4117   size = GET_MODE_BITSIZE (mode);
4118 #endif
4119
4120   /* Only deduct something for a REM if the last divide done was
4121      for a different constant.   Then set the constant of the last
4122      divide.  */
4123   max_cost = (unsignedp
4124               ? udiv_cost (speed, compute_mode)
4125               : sdiv_cost (speed, compute_mode));
4126   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4127                      && INTVAL (op1) == last_div_const))
4128     max_cost -= (mul_cost (speed, compute_mode)
4129                  + add_cost (speed, compute_mode));
4130
4131   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4132
4133   /* Now convert to the best mode to use.  */
4134   if (compute_mode != mode)
4135     {
4136       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4137       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4138
4139       /* convert_modes may have placed op1 into a register, so we
4140          must recompute the following.  */
4141       op1_is_constant = CONST_INT_P (op1);
4142       op1_is_pow2 = (op1_is_constant
4143                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4144                           || (! unsignedp
4145                               && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1))))));
4146     }
4147
4148   /* If one of the operands is a volatile MEM, copy it into a register.  */
4149
4150   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4151     op0 = force_reg (compute_mode, op0);
4152   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4153     op1 = force_reg (compute_mode, op1);
4154
4155   /* If we need the remainder or if OP1 is constant, we need to
4156      put OP0 in a register in case it has any queued subexpressions.  */
4157   if (rem_flag || op1_is_constant)
4158     op0 = force_reg (compute_mode, op0);
4159
4160   last = get_last_insn ();
4161
4162   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4163   if (unsignedp)
4164     {
4165       if (code == FLOOR_DIV_EXPR)
4166         code = TRUNC_DIV_EXPR;
4167       if (code == FLOOR_MOD_EXPR)
4168         code = TRUNC_MOD_EXPR;
4169       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4170         code = TRUNC_DIV_EXPR;
4171     }
4172
4173   if (op1 != const0_rtx)
4174     switch (code)
4175       {
4176       case TRUNC_MOD_EXPR:
4177       case TRUNC_DIV_EXPR:
4178         if (op1_is_constant)
4179           {
4180             if (unsignedp)
4181               {
4182                 unsigned HOST_WIDE_INT mh, ml;
4183                 int pre_shift, post_shift;
4184                 int dummy;
4185                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4186                                             & GET_MODE_MASK (compute_mode));
4187
4188                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4189                   {
4190                     pre_shift = floor_log2 (d);
4191                     if (rem_flag)
4192                       {
4193                         unsigned HOST_WIDE_INT mask
4194                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4195                         remainder
4196                           = expand_binop (compute_mode, and_optab, op0,
4197                                           gen_int_mode (mask, compute_mode),
4198                                           remainder, 1,
4199                                           OPTAB_LIB_WIDEN);
4200                         if (remainder)
4201                           return gen_lowpart (mode, remainder);
4202                       }
4203                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4204                                              pre_shift, tquotient, 1);
4205                   }
4206                 else if (size <= HOST_BITS_PER_WIDE_INT)
4207                   {
4208                     if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4209                       {
4210                         /* Most significant bit of divisor is set; emit an scc
4211                            insn.  */
4212                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4213                                                           compute_mode, 1, 1);
4214                       }
4215                     else
4216                       {
4217                         /* Find a suitable multiplier and right shift count
4218                            instead of multiplying with D.  */
4219
4220                         mh = choose_multiplier (d, size, size,
4221                                                 &ml, &post_shift, &dummy);
4222
4223                         /* If the suggested multiplier is more than SIZE bits,
4224                            we can do better for even divisors, using an
4225                            initial right shift.  */
4226                         if (mh != 0 && (d & 1) == 0)
4227                           {
4228                             pre_shift = ctz_or_zero (d);
4229                             mh = choose_multiplier (d >> pre_shift, size,
4230                                                     size - pre_shift,
4231                                                     &ml, &post_shift, &dummy);
4232                             gcc_assert (!mh);
4233                           }
4234                         else
4235                           pre_shift = 0;
4236
4237                         if (mh != 0)
4238                           {
4239                             rtx t1, t2, t3, t4;
4240
4241                             if (post_shift - 1 >= BITS_PER_WORD)
4242                               goto fail1;
4243
4244                             extra_cost
4245                               = (shift_cost (speed, compute_mode, post_shift - 1)
4246                                  + shift_cost (speed, compute_mode, 1)
4247                                  + 2 * add_cost (speed, compute_mode));
4248                             t1 = expmed_mult_highpart
4249                               (compute_mode, op0,
4250                                gen_int_mode (ml, compute_mode),
4251                                NULL_RTX, 1, max_cost - extra_cost);
4252                             if (t1 == 0)
4253                               goto fail1;
4254                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4255                                                                op0, t1),
4256                                                 NULL_RTX);
4257                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4258                                                t2, 1, NULL_RTX, 1);
4259                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4260                                                               t1, t3),
4261                                                 NULL_RTX);
4262                             quotient = expand_shift
4263                               (RSHIFT_EXPR, compute_mode, t4,
4264                                post_shift - 1, tquotient, 1);
4265                           }
4266                         else
4267                           {
4268                             rtx t1, t2;
4269
4270                             if (pre_shift >= BITS_PER_WORD
4271                                 || post_shift >= BITS_PER_WORD)
4272                               goto fail1;
4273
4274                             t1 = expand_shift
4275                               (RSHIFT_EXPR, compute_mode, op0,
4276                                pre_shift, NULL_RTX, 1);
4277                             extra_cost
4278                               = (shift_cost (speed, compute_mode, pre_shift)
4279                                  + shift_cost (speed, compute_mode, post_shift));
4280                             t2 = expmed_mult_highpart
4281                               (compute_mode, t1,
4282                                gen_int_mode (ml, compute_mode),
4283                                NULL_RTX, 1, max_cost - extra_cost);
4284                             if (t2 == 0)
4285                               goto fail1;
4286                             quotient = expand_shift
4287                               (RSHIFT_EXPR, compute_mode, t2,
4288                                post_shift, tquotient, 1);
4289                           }
4290                       }
4291                   }
4292                 else            /* Too wide mode to use tricky code */
4293                   break;
4294
4295                 insn = get_last_insn ();
4296                 if (insn != last)
4297                   set_dst_reg_note (insn, REG_EQUAL,
4298                                     gen_rtx_UDIV (compute_mode, op0, op1),
4299                                     quotient);
4300               }
4301             else                /* TRUNC_DIV, signed */
4302               {
4303                 unsigned HOST_WIDE_INT ml;
4304                 int lgup, post_shift;
4305                 rtx mlr;
4306                 HOST_WIDE_INT d = INTVAL (op1);
4307                 unsigned HOST_WIDE_INT abs_d;
4308
4309                 /* Since d might be INT_MIN, we have to cast to
4310                    unsigned HOST_WIDE_INT before negating to avoid
4311                    undefined signed overflow.  */
4312                 abs_d = (d >= 0
4313                          ? (unsigned HOST_WIDE_INT) d
4314                          : - (unsigned HOST_WIDE_INT) d);
4315
4316                 /* n rem d = n rem -d */
4317                 if (rem_flag && d < 0)
4318                   {
4319                     d = abs_d;
4320                     op1 = gen_int_mode (abs_d, compute_mode);
4321                   }
4322
4323                 if (d == 1)
4324                   quotient = op0;
4325                 else if (d == -1)
4326                   quotient = expand_unop (compute_mode, neg_optab, op0,
4327                                           tquotient, 0);
4328                 else if (HOST_BITS_PER_WIDE_INT >= size
4329                          && abs_d == HOST_WIDE_INT_1U << (size - 1))
4330                   {
4331                     /* This case is not handled correctly below.  */
4332                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4333                                                 compute_mode, 1, 1);
4334                     if (quotient == 0)
4335                       goto fail1;
4336                   }
4337                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4338                          && (rem_flag
4339                              ? smod_pow2_cheap (speed, compute_mode)
4340                              : sdiv_pow2_cheap (speed, compute_mode))
4341                          /* We assume that cheap metric is true if the
4342                             optab has an expander for this mode.  */
4343                          && ((optab_handler ((rem_flag ? smod_optab
4344                                               : sdiv_optab),
4345                                              compute_mode)
4346                               != CODE_FOR_nothing)
4347                              || (optab_handler (sdivmod_optab,
4348                                                 compute_mode)
4349                                  != CODE_FOR_nothing)))
4350                   ;
4351                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4352                   {
4353                     if (rem_flag)
4354                       {
4355                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4356                         if (remainder)
4357                           return gen_lowpart (mode, remainder);
4358                       }
4359
4360                     if (sdiv_pow2_cheap (speed, compute_mode)
4361                         && ((optab_handler (sdiv_optab, compute_mode)
4362                              != CODE_FOR_nothing)
4363                             || (optab_handler (sdivmod_optab, compute_mode)
4364                                 != CODE_FOR_nothing)))
4365                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4366                                                 compute_mode, op0,
4367                                                 gen_int_mode (abs_d,
4368                                                               compute_mode),
4369                                                 NULL_RTX, 0);
4370                     else
4371                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4372
4373                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4374                        negate the quotient.  */
4375                     if (d < 0)
4376                       {
4377                         insn = get_last_insn ();
4378                         if (insn != last
4379                             && abs_d < (HOST_WIDE_INT_1U
4380                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4381                           set_dst_reg_note (insn, REG_EQUAL,
4382                                             gen_rtx_DIV (compute_mode, op0,
4383                                                          gen_int_mode
4384                                                            (abs_d,
4385                                                             compute_mode)),
4386                                             quotient);
4387
4388                         quotient = expand_unop (compute_mode, neg_optab,
4389                                                 quotient, quotient, 0);
4390                       }
4391                   }
4392                 else if (size <= HOST_BITS_PER_WIDE_INT)
4393                   {
4394                     choose_multiplier (abs_d, size, size - 1,
4395                                        &ml, &post_shift, &lgup);
4396                     if (ml < HOST_WIDE_INT_1U << (size - 1))
4397                       {
4398                         rtx t1, t2, t3;
4399
4400                         if (post_shift >= BITS_PER_WORD
4401                             || size - 1 >= BITS_PER_WORD)
4402                           goto fail1;
4403
4404                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4405                                       + shift_cost (speed, compute_mode, size - 1)
4406                                       + add_cost (speed, compute_mode));
4407                         t1 = expmed_mult_highpart
4408                           (compute_mode, op0, gen_int_mode (ml, compute_mode),
4409                            NULL_RTX, 0, max_cost - extra_cost);
4410                         if (t1 == 0)
4411                           goto fail1;
4412                         t2 = expand_shift
4413                           (RSHIFT_EXPR, compute_mode, t1,
4414                            post_shift, NULL_RTX, 0);
4415                         t3 = expand_shift
4416                           (RSHIFT_EXPR, compute_mode, op0,
4417                            size - 1, NULL_RTX, 0);
4418                         if (d < 0)
4419                           quotient
4420                             = force_operand (gen_rtx_MINUS (compute_mode,
4421                                                             t3, t2),
4422                                              tquotient);
4423                         else
4424                           quotient
4425                             = force_operand (gen_rtx_MINUS (compute_mode,
4426                                                             t2, t3),
4427                                              tquotient);
4428                       }
4429                     else
4430                       {
4431                         rtx t1, t2, t3, t4;
4432
4433                         if (post_shift >= BITS_PER_WORD
4434                             || size - 1 >= BITS_PER_WORD)
4435                           goto fail1;
4436
4437                         ml |= HOST_WIDE_INT_M1U << (size - 1);
4438                         mlr = gen_int_mode (ml, compute_mode);
4439                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4440                                       + shift_cost (speed, compute_mode, size - 1)
4441                                       + 2 * add_cost (speed, compute_mode));
4442                         t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4443                                                    NULL_RTX, 0,
4444                                                    max_cost - extra_cost);
4445                         if (t1 == 0)
4446                           goto fail1;
4447                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4448                                                           t1, op0),
4449                                             NULL_RTX);
4450                         t3 = expand_shift
4451                           (RSHIFT_EXPR, compute_mode, t2,
4452                            post_shift, NULL_RTX, 0);
4453                         t4 = expand_shift
4454                           (RSHIFT_EXPR, compute_mode, op0,
4455                            size - 1, NULL_RTX, 0);
4456                         if (d < 0)
4457                           quotient
4458                             = force_operand (gen_rtx_MINUS (compute_mode,
4459                                                             t4, t3),
4460                                              tquotient);
4461                         else
4462                           quotient
4463                             = force_operand (gen_rtx_MINUS (compute_mode,
4464                                                             t3, t4),
4465                                              tquotient);
4466                       }
4467                   }
4468                 else            /* Too wide mode to use tricky code */
4469                   break;
4470
4471                 insn = get_last_insn ();
4472                 if (insn != last)
4473                   set_dst_reg_note (insn, REG_EQUAL,
4474                                     gen_rtx_DIV (compute_mode, op0, op1),
4475                                     quotient);
4476               }
4477             break;
4478           }
4479       fail1:
4480         delete_insns_since (last);
4481         break;
4482
4483       case FLOOR_DIV_EXPR:
4484       case FLOOR_MOD_EXPR:
4485       /* We will come here only for signed operations.  */
4486         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4487           {
4488             unsigned HOST_WIDE_INT mh, ml;
4489             int pre_shift, lgup, post_shift;
4490             HOST_WIDE_INT d = INTVAL (op1);
4491
4492             if (d > 0)
4493               {
4494                 /* We could just as easily deal with negative constants here,
4495                    but it does not seem worth the trouble for GCC 2.6.  */
4496                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4497                   {
4498                     pre_shift = floor_log2 (d);
4499                     if (rem_flag)
4500                       {
4501                         unsigned HOST_WIDE_INT mask
4502                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4503                         remainder = expand_binop
4504                           (compute_mode, and_optab, op0,
4505                            gen_int_mode (mask, compute_mode),
4506                            remainder, 0, OPTAB_LIB_WIDEN);
4507                         if (remainder)
4508                           return gen_lowpart (mode, remainder);
4509                       }
4510                     quotient = expand_shift
4511                       (RSHIFT_EXPR, compute_mode, op0,
4512                        pre_shift, tquotient, 0);
4513                   }
4514                 else
4515                   {
4516                     rtx t1, t2, t3, t4;
4517
4518                     mh = choose_multiplier (d, size, size - 1,
4519                                             &ml, &post_shift, &lgup);
4520                     gcc_assert (!mh);
4521
4522                     if (post_shift < BITS_PER_WORD
4523                         && size - 1 < BITS_PER_WORD)
4524                       {
4525                         t1 = expand_shift
4526                           (RSHIFT_EXPR, compute_mode, op0,
4527                            size - 1, NULL_RTX, 0);
4528                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4529                                            NULL_RTX, 0, OPTAB_WIDEN);
4530                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4531                                       + shift_cost (speed, compute_mode, size - 1)
4532                                       + 2 * add_cost (speed, compute_mode));
4533                         t3 = expmed_mult_highpart
4534                           (compute_mode, t2, gen_int_mode (ml, compute_mode),
4535                            NULL_RTX, 1, max_cost - extra_cost);
4536                         if (t3 != 0)
4537                           {
4538                             t4 = expand_shift
4539                               (RSHIFT_EXPR, compute_mode, t3,
4540                                post_shift, NULL_RTX, 1);
4541                             quotient = expand_binop (compute_mode, xor_optab,
4542                                                      t4, t1, tquotient, 0,
4543                                                      OPTAB_WIDEN);
4544                           }
4545                       }
4546                   }
4547               }
4548             else
4549               {
4550                 rtx nsign, t1, t2, t3, t4;
4551                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4552                                                   op0, constm1_rtx), NULL_RTX);
4553                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4554                                    0, OPTAB_WIDEN);
4555                 nsign = expand_shift
4556                   (RSHIFT_EXPR, compute_mode, t2,
4557                    size - 1, NULL_RTX, 0);
4558                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4559                                     NULL_RTX);
4560                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4561                                     NULL_RTX, 0);
4562                 if (t4)
4563                   {
4564                     rtx t5;
4565                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4566                                       NULL_RTX, 0);
4567                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4568                                                             t4, t5),
4569                                               tquotient);
4570                   }
4571               }
4572           }
4573
4574         if (quotient != 0)
4575           break;
4576         delete_insns_since (last);
4577
4578         /* Try using an instruction that produces both the quotient and
4579            remainder, using truncation.  We can easily compensate the quotient
4580            or remainder to get floor rounding, once we have the remainder.
4581            Notice that we compute also the final remainder value here,
4582            and return the result right away.  */
4583         if (target == 0 || GET_MODE (target) != compute_mode)
4584           target = gen_reg_rtx (compute_mode);
4585
4586         if (rem_flag)
4587           {
4588             remainder
4589               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4590             quotient = gen_reg_rtx (compute_mode);
4591           }
4592         else
4593           {
4594             quotient
4595               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4596             remainder = gen_reg_rtx (compute_mode);
4597           }
4598
4599         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4600                                  quotient, remainder, 0))
4601           {
4602             /* This could be computed with a branch-less sequence.
4603                Save that for later.  */
4604             rtx tem;
4605             rtx_code_label *label = gen_label_rtx ();
4606             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4607             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4608                                 NULL_RTX, 0, OPTAB_WIDEN);
4609             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4610             expand_dec (quotient, const1_rtx);
4611             expand_inc (remainder, op1);
4612             emit_label (label);
4613             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4614           }
4615
4616         /* No luck with division elimination or divmod.  Have to do it
4617            by conditionally adjusting op0 *and* the result.  */
4618         {
4619           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4620           rtx adjusted_op0;
4621           rtx tem;
4622
4623           quotient = gen_reg_rtx (compute_mode);
4624           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4625           label1 = gen_label_rtx ();
4626           label2 = gen_label_rtx ();
4627           label3 = gen_label_rtx ();
4628           label4 = gen_label_rtx ();
4629           label5 = gen_label_rtx ();
4630           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4631           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4632           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4633                               quotient, 0, OPTAB_LIB_WIDEN);
4634           if (tem != quotient)
4635             emit_move_insn (quotient, tem);
4636           emit_jump_insn (targetm.gen_jump (label5));
4637           emit_barrier ();
4638           emit_label (label1);
4639           expand_inc (adjusted_op0, const1_rtx);
4640           emit_jump_insn (targetm.gen_jump (label4));
4641           emit_barrier ();
4642           emit_label (label2);
4643           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4644           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4645                               quotient, 0, OPTAB_LIB_WIDEN);
4646           if (tem != quotient)
4647             emit_move_insn (quotient, tem);
4648           emit_jump_insn (targetm.gen_jump (label5));
4649           emit_barrier ();
4650           emit_label (label3);
4651           expand_dec (adjusted_op0, const1_rtx);
4652           emit_label (label4);
4653           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4654                               quotient, 0, OPTAB_LIB_WIDEN);
4655           if (tem != quotient)
4656             emit_move_insn (quotient, tem);
4657           expand_dec (quotient, const1_rtx);
4658           emit_label (label5);
4659         }
4660         break;
4661
4662       case CEIL_DIV_EXPR:
4663       case CEIL_MOD_EXPR:
4664         if (unsignedp)
4665           {
4666             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4667               {
4668                 rtx t1, t2, t3;
4669                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4670                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4671                                    floor_log2 (d), tquotient, 1);
4672                 t2 = expand_binop (compute_mode, and_optab, op0,
4673                                    gen_int_mode (d - 1, compute_mode),
4674                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4675                 t3 = gen_reg_rtx (compute_mode);
4676                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4677                                       compute_mode, 1, 1);
4678                 if (t3 == 0)
4679                   {
4680                     rtx_code_label *lab;
4681                     lab = gen_label_rtx ();
4682                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4683                     expand_inc (t1, const1_rtx);
4684                     emit_label (lab);
4685                     quotient = t1;
4686                   }
4687                 else
4688                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4689                                                           t1, t3),
4690                                             tquotient);
4691                 break;
4692               }
4693
4694             /* Try using an instruction that produces both the quotient and
4695                remainder, using truncation.  We can easily compensate the
4696                quotient or remainder to get ceiling rounding, once we have the
4697                remainder.  Notice that we compute also the final remainder
4698                value here, and return the result right away.  */
4699             if (target == 0 || GET_MODE (target) != compute_mode)
4700               target = gen_reg_rtx (compute_mode);
4701
4702             if (rem_flag)
4703               {
4704                 remainder = (REG_P (target)
4705                              ? target : gen_reg_rtx (compute_mode));
4706                 quotient = gen_reg_rtx (compute_mode);
4707               }
4708             else
4709               {
4710                 quotient = (REG_P (target)
4711                             ? target : gen_reg_rtx (compute_mode));
4712                 remainder = gen_reg_rtx (compute_mode);
4713               }
4714
4715             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4716                                      remainder, 1))
4717               {
4718                 /* This could be computed with a branch-less sequence.
4719                    Save that for later.  */
4720                 rtx_code_label *label = gen_label_rtx ();
4721                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4722                                  compute_mode, label);
4723                 expand_inc (quotient, const1_rtx);
4724                 expand_dec (remainder, op1);
4725                 emit_label (label);
4726                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4727               }
4728
4729             /* No luck with division elimination or divmod.  Have to do it
4730                by conditionally adjusting op0 *and* the result.  */
4731             {
4732               rtx_code_label *label1, *label2;
4733               rtx adjusted_op0, tem;
4734
4735               quotient = gen_reg_rtx (compute_mode);
4736               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4737               label1 = gen_label_rtx ();
4738               label2 = gen_label_rtx ();
4739               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4740                                compute_mode, label1);
4741               emit_move_insn  (quotient, const0_rtx);
4742               emit_jump_insn (targetm.gen_jump (label2));
4743               emit_barrier ();
4744               emit_label (label1);
4745               expand_dec (adjusted_op0, const1_rtx);
4746               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4747                                   quotient, 1, OPTAB_LIB_WIDEN);
4748               if (tem != quotient)
4749                 emit_move_insn (quotient, tem);
4750               expand_inc (quotient, const1_rtx);
4751               emit_label (label2);
4752             }
4753           }
4754         else /* signed */
4755           {
4756             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4757                 && INTVAL (op1) >= 0)
4758               {
4759                 /* This is extremely similar to the code for the unsigned case
4760                    above.  For 2.7 we should merge these variants, but for
4761                    2.6.1 I don't want to touch the code for unsigned since that
4762                    get used in C.  The signed case will only be used by other
4763                    languages (Ada).  */
4764
4765                 rtx t1, t2, t3;
4766                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4767                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4768                                    floor_log2 (d), tquotient, 0);
4769                 t2 = expand_binop (compute_mode, and_optab, op0,
4770                                    gen_int_mode (d - 1, compute_mode),
4771                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4772                 t3 = gen_reg_rtx (compute_mode);
4773                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4774                                       compute_mode, 1, 1);
4775                 if (t3 == 0)
4776                   {
4777                     rtx_code_label *lab;
4778                     lab = gen_label_rtx ();
4779                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4780                     expand_inc (t1, const1_rtx);
4781                     emit_label (lab);
4782                     quotient = t1;
4783                   }
4784                 else
4785                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4786                                                           t1, t3),
4787                                             tquotient);
4788                 break;
4789               }
4790
4791             /* Try using an instruction that produces both the quotient and
4792                remainder, using truncation.  We can easily compensate the
4793                quotient or remainder to get ceiling rounding, once we have the
4794                remainder.  Notice that we compute also the final remainder
4795                value here, and return the result right away.  */
4796             if (target == 0 || GET_MODE (target) != compute_mode)
4797               target = gen_reg_rtx (compute_mode);
4798             if (rem_flag)
4799               {
4800                 remainder= (REG_P (target)
4801                             ? target : gen_reg_rtx (compute_mode));
4802                 quotient = gen_reg_rtx (compute_mode);
4803               }
4804             else
4805               {
4806                 quotient = (REG_P (target)
4807                             ? target : gen_reg_rtx (compute_mode));
4808                 remainder = gen_reg_rtx (compute_mode);
4809               }
4810
4811             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4812                                      remainder, 0))
4813               {
4814                 /* This could be computed with a branch-less sequence.
4815                    Save that for later.  */
4816                 rtx tem;
4817                 rtx_code_label *label = gen_label_rtx ();
4818                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4819                                  compute_mode, label);
4820                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4821                                     NULL_RTX, 0, OPTAB_WIDEN);
4822                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4823                 expand_inc (quotient, const1_rtx);
4824                 expand_dec (remainder, op1);
4825                 emit_label (label);
4826                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4827               }
4828
4829             /* No luck with division elimination or divmod.  Have to do it
4830                by conditionally adjusting op0 *and* the result.  */
4831             {
4832               rtx_code_label *label1, *label2, *label3, *label4, *label5;
4833               rtx adjusted_op0;
4834               rtx tem;
4835
4836               quotient = gen_reg_rtx (compute_mode);
4837               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4838               label1 = gen_label_rtx ();
4839               label2 = gen_label_rtx ();
4840               label3 = gen_label_rtx ();
4841               label4 = gen_label_rtx ();
4842               label5 = gen_label_rtx ();
4843               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4844               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4845                                compute_mode, label1);
4846               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4847                                   quotient, 0, OPTAB_LIB_WIDEN);
4848               if (tem != quotient)
4849                 emit_move_insn (quotient, tem);
4850               emit_jump_insn (targetm.gen_jump (label5));
4851               emit_barrier ();
4852               emit_label (label1);
4853               expand_dec (adjusted_op0, const1_rtx);
4854               emit_jump_insn (targetm.gen_jump (label4));
4855               emit_barrier ();
4856               emit_label (label2);
4857               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4858                                compute_mode, label3);
4859               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4860                                   quotient, 0, OPTAB_LIB_WIDEN);
4861               if (tem != quotient)
4862                 emit_move_insn (quotient, tem);
4863               emit_jump_insn (targetm.gen_jump (label5));
4864               emit_barrier ();
4865               emit_label (label3);
4866               expand_inc (adjusted_op0, const1_rtx);
4867               emit_label (label4);
4868               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4869                                   quotient, 0, OPTAB_LIB_WIDEN);
4870               if (tem != quotient)
4871                 emit_move_insn (quotient, tem);
4872               expand_inc (quotient, const1_rtx);
4873               emit_label (label5);
4874             }
4875           }
4876         break;
4877
4878       case EXACT_DIV_EXPR:
4879         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4880           {
4881             HOST_WIDE_INT d = INTVAL (op1);
4882             unsigned HOST_WIDE_INT ml;
4883             int pre_shift;
4884             rtx t1;
4885
4886             pre_shift = ctz_or_zero (d);
4887             ml = invert_mod2n (d >> pre_shift, size);
4888             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4889                                pre_shift, NULL_RTX, unsignedp);
4890             quotient = expand_mult (compute_mode, t1,
4891                                     gen_int_mode (ml, compute_mode),
4892                                     NULL_RTX, 1);
4893
4894             insn = get_last_insn ();
4895             set_dst_reg_note (insn, REG_EQUAL,
4896                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4897                                               compute_mode, op0, op1),
4898                               quotient);
4899           }
4900         break;
4901
4902       case ROUND_DIV_EXPR:
4903       case ROUND_MOD_EXPR:
4904         if (unsignedp)
4905           {
4906             rtx tem;
4907             rtx_code_label *label;
4908             label = gen_label_rtx ();
4909             quotient = gen_reg_rtx (compute_mode);
4910             remainder = gen_reg_rtx (compute_mode);
4911             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4912               {
4913                 rtx tem;
4914                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4915                                          quotient, 1, OPTAB_LIB_WIDEN);
4916                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4917                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4918                                           remainder, 1, OPTAB_LIB_WIDEN);
4919               }
4920             tem = plus_constant (compute_mode, op1, -1);
4921             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4922             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4923             expand_inc (quotient, const1_rtx);
4924             expand_dec (remainder, op1);
4925             emit_label (label);
4926           }
4927         else
4928           {
4929             rtx abs_rem, abs_op1, tem, mask;
4930             rtx_code_label *label;
4931             label = gen_label_rtx ();
4932             quotient = gen_reg_rtx (compute_mode);
4933             remainder = gen_reg_rtx (compute_mode);
4934             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4935               {
4936                 rtx tem;
4937                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4938                                          quotient, 0, OPTAB_LIB_WIDEN);
4939                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4940                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4941                                           remainder, 0, OPTAB_LIB_WIDEN);
4942               }
4943             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4944             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4945             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4946                                 1, NULL_RTX, 1);
4947             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4948             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4949                                 NULL_RTX, 0, OPTAB_WIDEN);
4950             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4951                                  size - 1, NULL_RTX, 0);
4952             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4953                                 NULL_RTX, 0, OPTAB_WIDEN);
4954             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4955                                 NULL_RTX, 0, OPTAB_WIDEN);
4956             expand_inc (quotient, tem);
4957             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4958                                 NULL_RTX, 0, OPTAB_WIDEN);
4959             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4960                                 NULL_RTX, 0, OPTAB_WIDEN);
4961             expand_dec (remainder, tem);
4962             emit_label (label);
4963           }
4964         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4965
4966       default:
4967         gcc_unreachable ();
4968       }
4969
4970   if (quotient == 0)
4971     {
4972       if (target && GET_MODE (target) != compute_mode)
4973         target = 0;
4974
4975       if (rem_flag)
4976         {
4977           /* Try to produce the remainder without producing the quotient.
4978              If we seem to have a divmod pattern that does not require widening,
4979              don't try widening here.  We should really have a WIDEN argument
4980              to expand_twoval_binop, since what we'd really like to do here is
4981              1) try a mod insn in compute_mode
4982              2) try a divmod insn in compute_mode
4983              3) try a div insn in compute_mode and multiply-subtract to get
4984                 remainder
4985              4) try the same things with widening allowed.  */
4986           remainder
4987             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4988                                  op0, op1, target,
4989                                  unsignedp,
4990                                  ((optab_handler (optab2, compute_mode)
4991                                    != CODE_FOR_nothing)
4992                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4993           if (remainder == 0)
4994             {
4995               /* No luck there.  Can we do remainder and divide at once
4996                  without a library call?  */
4997               remainder = gen_reg_rtx (compute_mode);
4998               if (! expand_twoval_binop ((unsignedp
4999                                           ? udivmod_optab
5000                                           : sdivmod_optab),
5001                                          op0, op1,
5002                                          NULL_RTX, remainder, unsignedp))
5003                 remainder = 0;
5004             }
5005
5006           if (remainder)
5007             return gen_lowpart (mode, remainder);
5008         }
5009
5010       /* Produce the quotient.  Try a quotient insn, but not a library call.
5011          If we have a divmod in this mode, use it in preference to widening
5012          the div (for this test we assume it will not fail). Note that optab2
5013          is set to the one of the two optabs that the call below will use.  */
5014       quotient
5015         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5016                              op0, op1, rem_flag ? NULL_RTX : target,
5017                              unsignedp,
5018                              ((optab_handler (optab2, compute_mode)
5019                                != CODE_FOR_nothing)
5020                               ? OPTAB_DIRECT : OPTAB_WIDEN));
5021
5022       if (quotient == 0)
5023         {
5024           /* No luck there.  Try a quotient-and-remainder insn,
5025              keeping the quotient alone.  */
5026           quotient = gen_reg_rtx (compute_mode);
5027           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5028                                      op0, op1,
5029                                      quotient, NULL_RTX, unsignedp))
5030             {
5031               quotient = 0;
5032               if (! rem_flag)
5033                 /* Still no luck.  If we are not computing the remainder,
5034                    use a library call for the quotient.  */
5035                 quotient = sign_expand_binop (compute_mode,
5036                                               udiv_optab, sdiv_optab,
5037                                               op0, op1, target,
5038                                               unsignedp, OPTAB_LIB_WIDEN);
5039             }
5040         }
5041     }
5042
5043   if (rem_flag)
5044     {
5045       if (target && GET_MODE (target) != compute_mode)
5046         target = 0;
5047
5048       if (quotient == 0)
5049         {
5050           /* No divide instruction either.  Use library for remainder.  */
5051           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5052                                          op0, op1, target,
5053                                          unsignedp, OPTAB_LIB_WIDEN);
5054           /* No remainder function.  Try a quotient-and-remainder
5055              function, keeping the remainder.  */
5056           if (!remainder)
5057             {
5058               remainder = gen_reg_rtx (compute_mode);
5059               if (!expand_twoval_binop_libfunc
5060                   (unsignedp ? udivmod_optab : sdivmod_optab,
5061                    op0, op1,
5062                    NULL_RTX, remainder,
5063                    unsignedp ? UMOD : MOD))
5064                 remainder = NULL_RTX;
5065             }
5066         }
5067       else
5068         {
5069           /* We divided.  Now finish doing X - Y * (X / Y).  */
5070           remainder = expand_mult (compute_mode, quotient, op1,
5071                                    NULL_RTX, unsignedp);
5072           remainder = expand_binop (compute_mode, sub_optab, op0,
5073                                     remainder, target, unsignedp,
5074                                     OPTAB_LIB_WIDEN);
5075         }
5076     }
5077
5078   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5079 }
5080 \f
5081 /* Return a tree node with data type TYPE, describing the value of X.
5082    Usually this is an VAR_DECL, if there is no obvious better choice.
5083    X may be an expression, however we only support those expressions
5084    generated by loop.c.  */
5085
5086 tree
5087 make_tree (tree type, rtx x)
5088 {
5089   tree t;
5090
5091   switch (GET_CODE (x))
5092     {
5093     case CONST_INT:
5094     case CONST_WIDE_INT:
5095       t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5096       return t;
5097
5098     case CONST_DOUBLE:
5099       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5100       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5101         t = wide_int_to_tree (type,
5102                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5103                                                     HOST_BITS_PER_WIDE_INT * 2));
5104       else
5105         t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5106
5107       return t;
5108
5109     case CONST_VECTOR:
5110       {
5111         int units = CONST_VECTOR_NUNITS (x);
5112         tree itype = TREE_TYPE (type);
5113         tree *elts;
5114         int i;
5115
5116         /* Build a tree with vector elements.  */
5117         elts = XALLOCAVEC (tree, units);
5118         for (i = units - 1; i >= 0; --i)
5119           {
5120             rtx elt = CONST_VECTOR_ELT (x, i);
5121             elts[i] = make_tree (itype, elt);
5122           }
5123
5124         return build_vector (type, elts);
5125       }
5126
5127     case PLUS:
5128       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5129                           make_tree (type, XEXP (x, 1)));
5130
5131     case MINUS:
5132       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5133                           make_tree (type, XEXP (x, 1)));
5134
5135     case NEG:
5136       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5137
5138     case MULT:
5139       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5140                           make_tree (type, XEXP (x, 1)));
5141
5142     case ASHIFT:
5143       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5144                           make_tree (type, XEXP (x, 1)));
5145
5146     case LSHIFTRT:
5147       t = unsigned_type_for (type);
5148       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5149                                          make_tree (t, XEXP (x, 0)),
5150                                          make_tree (type, XEXP (x, 1))));
5151
5152     case ASHIFTRT:
5153       t = signed_type_for (type);
5154       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5155                                          make_tree (t, XEXP (x, 0)),
5156                                          make_tree (type, XEXP (x, 1))));
5157
5158     case DIV:
5159       if (TREE_CODE (type) != REAL_TYPE)
5160         t = signed_type_for (type);
5161       else
5162         t = type;
5163
5164       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5165                                          make_tree (t, XEXP (x, 0)),
5166                                          make_tree (t, XEXP (x, 1))));
5167     case UDIV:
5168       t = unsigned_type_for (type);
5169       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5170                                          make_tree (t, XEXP (x, 0)),
5171                                          make_tree (t, XEXP (x, 1))));
5172
5173     case SIGN_EXTEND:
5174     case ZERO_EXTEND:
5175       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5176                                           GET_CODE (x) == ZERO_EXTEND);
5177       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5178
5179     case CONST:
5180       return make_tree (type, XEXP (x, 0));
5181
5182     case SYMBOL_REF:
5183       t = SYMBOL_REF_DECL (x);
5184       if (t)
5185         return fold_convert (type, build_fold_addr_expr (t));
5186       /* fall through.  */
5187
5188     default:
5189       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5190
5191       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5192          address mode to pointer mode.  */
5193       if (POINTER_TYPE_P (type))
5194         x = convert_memory_address_addr_space
5195               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5196
5197       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5198          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5199       t->decl_with_rtl.rtl = x;
5200
5201       return t;
5202     }
5203 }
5204 \f
5205 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5206    and returning TARGET.
5207
5208    If TARGET is 0, a pseudo-register or constant is returned.  */
5209
5210 rtx
5211 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5212 {
5213   rtx tem = 0;
5214
5215   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5216     tem = simplify_binary_operation (AND, mode, op0, op1);
5217   if (tem == 0)
5218     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5219
5220   if (target == 0)
5221     target = tem;
5222   else if (tem != target)
5223     emit_move_insn (target, tem);
5224   return target;
5225 }
5226
5227 /* Helper function for emit_store_flag.  */
5228 rtx
5229 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5230              machine_mode mode, machine_mode compare_mode,
5231              int unsignedp, rtx x, rtx y, int normalizep,
5232              machine_mode target_mode)
5233 {
5234   struct expand_operand ops[4];
5235   rtx op0, comparison, subtarget;
5236   rtx_insn *last;
5237   machine_mode result_mode = targetm.cstore_mode (icode);
5238
5239   last = get_last_insn ();
5240   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5241   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5242   if (!x || !y)
5243     {
5244       delete_insns_since (last);
5245       return NULL_RTX;
5246     }
5247
5248   if (target_mode == VOIDmode)
5249     target_mode = result_mode;
5250   if (!target)
5251     target = gen_reg_rtx (target_mode);
5252
5253   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5254
5255   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5256   create_fixed_operand (&ops[1], comparison);
5257   create_fixed_operand (&ops[2], x);
5258   create_fixed_operand (&ops[3], y);
5259   if (!maybe_expand_insn (icode, 4, ops))
5260     {
5261       delete_insns_since (last);
5262       return NULL_RTX;
5263     }
5264   subtarget = ops[0].value;
5265
5266   /* If we are converting to a wider mode, first convert to
5267      TARGET_MODE, then normalize.  This produces better combining
5268      opportunities on machines that have a SIGN_EXTRACT when we are
5269      testing a single bit.  This mostly benefits the 68k.
5270
5271      If STORE_FLAG_VALUE does not have the sign bit set when
5272      interpreted in MODE, we can do this conversion as unsigned, which
5273      is usually more efficient.  */
5274   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5275     {
5276       convert_move (target, subtarget,
5277                     val_signbit_known_clear_p (result_mode,
5278                                                STORE_FLAG_VALUE));
5279       op0 = target;
5280       result_mode = target_mode;
5281     }
5282   else
5283     op0 = subtarget;
5284
5285   /* If we want to keep subexpressions around, don't reuse our last
5286      target.  */
5287   if (optimize)
5288     subtarget = 0;
5289
5290   /* Now normalize to the proper value in MODE.  Sometimes we don't
5291      have to do anything.  */
5292   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5293     ;
5294   /* STORE_FLAG_VALUE might be the most negative number, so write
5295      the comparison this way to avoid a compiler-time warning.  */
5296   else if (- normalizep == STORE_FLAG_VALUE)
5297     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5298
5299   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5300      it hard to use a value of just the sign bit due to ANSI integer
5301      constant typing rules.  */
5302   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5303     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5304                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5305                         normalizep == 1);
5306   else
5307     {
5308       gcc_assert (STORE_FLAG_VALUE & 1);
5309
5310       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5311       if (normalizep == -1)
5312         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5313     }
5314
5315   /* If we were converting to a smaller mode, do the conversion now.  */
5316   if (target_mode != result_mode)
5317     {
5318       convert_move (target, op0, 0);
5319       return target;
5320     }
5321   else
5322     return op0;
5323 }
5324
5325
5326 /* A subroutine of emit_store_flag only including "tricks" that do not
5327    need a recursive call.  These are kept separate to avoid infinite
5328    loops.  */
5329
5330 static rtx
5331 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5332                    machine_mode mode, int unsignedp, int normalizep,
5333                    machine_mode target_mode)
5334 {
5335   rtx subtarget;
5336   enum insn_code icode;
5337   machine_mode compare_mode;
5338   enum mode_class mclass;
5339   enum rtx_code scode;
5340
5341   if (unsignedp)
5342     code = unsigned_condition (code);
5343   scode = swap_condition (code);
5344
5345   /* If one operand is constant, make it the second one.  Only do this
5346      if the other operand is not constant as well.  */
5347
5348   if (swap_commutative_operands_p (op0, op1))
5349     {
5350       std::swap (op0, op1);
5351       code = swap_condition (code);
5352     }
5353
5354   if (mode == VOIDmode)
5355     mode = GET_MODE (op0);
5356
5357   /* For some comparisons with 1 and -1, we can convert this to
5358      comparisons with zero.  This will often produce more opportunities for
5359      store-flag insns.  */
5360
5361   switch (code)
5362     {
5363     case LT:
5364       if (op1 == const1_rtx)
5365         op1 = const0_rtx, code = LE;
5366       break;
5367     case LE:
5368       if (op1 == constm1_rtx)
5369         op1 = const0_rtx, code = LT;
5370       break;
5371     case GE:
5372       if (op1 == const1_rtx)
5373         op1 = const0_rtx, code = GT;
5374       break;
5375     case GT:
5376       if (op1 == constm1_rtx)
5377         op1 = const0_rtx, code = GE;
5378       break;
5379     case GEU:
5380       if (op1 == const1_rtx)
5381         op1 = const0_rtx, code = NE;
5382       break;
5383     case LTU:
5384       if (op1 == const1_rtx)
5385         op1 = const0_rtx, code = EQ;
5386       break;
5387     default:
5388       break;
5389     }
5390
5391   /* If we are comparing a double-word integer with zero or -1, we can
5392      convert the comparison into one involving a single word.  */
5393   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5394       && GET_MODE_CLASS (mode) == MODE_INT
5395       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5396     {
5397       rtx tem;
5398       if ((code == EQ || code == NE)
5399           && (op1 == const0_rtx || op1 == constm1_rtx))
5400         {
5401           rtx op00, op01;
5402
5403           /* Do a logical OR or AND of the two words and compare the
5404              result.  */
5405           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5406           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5407           tem = expand_binop (word_mode,
5408                               op1 == const0_rtx ? ior_optab : and_optab,
5409                               op00, op01, NULL_RTX, unsignedp,
5410                               OPTAB_DIRECT);
5411
5412           if (tem != 0)
5413             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5414                                    unsignedp, normalizep);
5415         }
5416       else if ((code == LT || code == GE) && op1 == const0_rtx)
5417         {
5418           rtx op0h;
5419
5420           /* If testing the sign bit, can just test on high word.  */
5421           op0h = simplify_gen_subreg (word_mode, op0, mode,
5422                                       subreg_highpart_offset (word_mode,
5423                                                               mode));
5424           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5425                                  unsignedp, normalizep);
5426         }
5427       else
5428         tem = NULL_RTX;
5429
5430       if (tem)
5431         {
5432           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5433             return tem;
5434           if (!target)
5435             target = gen_reg_rtx (target_mode);
5436
5437           convert_move (target, tem,
5438                         !val_signbit_known_set_p (word_mode,
5439                                                   (normalizep ? normalizep
5440                                                    : STORE_FLAG_VALUE)));
5441           return target;
5442         }
5443     }
5444
5445   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5446      complement of A (for GE) and shifting the sign bit to the low bit.  */
5447   if (op1 == const0_rtx && (code == LT || code == GE)
5448       && GET_MODE_CLASS (mode) == MODE_INT
5449       && (normalizep || STORE_FLAG_VALUE == 1
5450           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5451     {
5452       subtarget = target;
5453
5454       if (!target)
5455         target_mode = mode;
5456
5457       /* If the result is to be wider than OP0, it is best to convert it
5458          first.  If it is to be narrower, it is *incorrect* to convert it
5459          first.  */
5460       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5461         {
5462           op0 = convert_modes (target_mode, mode, op0, 0);
5463           mode = target_mode;
5464         }
5465
5466       if (target_mode != mode)
5467         subtarget = 0;
5468
5469       if (code == GE)
5470         op0 = expand_unop (mode, one_cmpl_optab, op0,
5471                            ((STORE_FLAG_VALUE == 1 || normalizep)
5472                             ? 0 : subtarget), 0);
5473
5474       if (STORE_FLAG_VALUE == 1 || normalizep)
5475         /* If we are supposed to produce a 0/1 value, we want to do
5476            a logical shift from the sign bit to the low-order bit; for
5477            a -1/0 value, we do an arithmetic shift.  */
5478         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5479                             GET_MODE_BITSIZE (mode) - 1,
5480                             subtarget, normalizep != -1);
5481
5482       if (mode != target_mode)
5483         op0 = convert_modes (target_mode, mode, op0, 0);
5484
5485       return op0;
5486     }
5487
5488   mclass = GET_MODE_CLASS (mode);
5489   for (compare_mode = mode; compare_mode != VOIDmode;
5490        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5491     {
5492      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5493      icode = optab_handler (cstore_optab, optab_mode);
5494      if (icode != CODE_FOR_nothing)
5495         {
5496           do_pending_stack_adjust ();
5497           rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5498                                  unsignedp, op0, op1, normalizep, target_mode);
5499           if (tem)
5500             return tem;
5501
5502           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5503             {
5504               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5505                                  unsignedp, op1, op0, normalizep, target_mode);
5506               if (tem)
5507                 return tem;
5508             }
5509           break;
5510         }
5511     }
5512
5513   return 0;
5514 }
5515
5516 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5517    and storing in TARGET.  Normally return TARGET.
5518    Return 0 if that cannot be done.
5519
5520    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5521    it is VOIDmode, they cannot both be CONST_INT.
5522
5523    UNSIGNEDP is for the case where we have to widen the operands
5524    to perform the operation.  It says to use zero-extension.
5525
5526    NORMALIZEP is 1 if we should convert the result to be either zero
5527    or one.  Normalize is -1 if we should convert the result to be
5528    either zero or -1.  If NORMALIZEP is zero, the result will be left
5529    "raw" out of the scc insn.  */
5530
5531 rtx
5532 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5533                  machine_mode mode, int unsignedp, int normalizep)
5534 {
5535   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5536   enum rtx_code rcode;
5537   rtx subtarget;
5538   rtx tem, trueval;
5539   rtx_insn *last;
5540
5541   /* If we compare constants, we shouldn't use a store-flag operation,
5542      but a constant load.  We can get there via the vanilla route that
5543      usually generates a compare-branch sequence, but will in this case
5544      fold the comparison to a constant, and thus elide the branch.  */
5545   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5546     return NULL_RTX;
5547
5548   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5549                            target_mode);
5550   if (tem)
5551     return tem;
5552
5553   /* If we reached here, we can't do this with a scc insn, however there
5554      are some comparisons that can be done in other ways.  Don't do any
5555      of these cases if branches are very cheap.  */
5556   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5557     return 0;
5558
5559   /* See what we need to return.  We can only return a 1, -1, or the
5560      sign bit.  */
5561
5562   if (normalizep == 0)
5563     {
5564       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5565         normalizep = STORE_FLAG_VALUE;
5566
5567       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5568         ;
5569       else
5570         return 0;
5571     }
5572
5573   last = get_last_insn ();
5574
5575   /* If optimizing, use different pseudo registers for each insn, instead
5576      of reusing the same pseudo.  This leads to better CSE, but slows
5577      down the compiler, since there are more pseudos */
5578   subtarget = (!optimize
5579                && (target_mode == mode)) ? target : NULL_RTX;
5580   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5581
5582   /* For floating-point comparisons, try the reverse comparison or try
5583      changing the "orderedness" of the comparison.  */
5584   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5585     {
5586       enum rtx_code first_code;
5587       bool and_them;
5588
5589       rcode = reverse_condition_maybe_unordered (code);
5590       if (can_compare_p (rcode, mode, ccp_store_flag)
5591           && (code == ORDERED || code == UNORDERED
5592               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5593               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5594         {
5595           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5596                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5597
5598           /* For the reverse comparison, use either an addition or a XOR.  */
5599           if (want_add
5600               && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5601                            optimize_insn_for_speed_p ()) == 0)
5602             {
5603               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5604                                        STORE_FLAG_VALUE, target_mode);
5605               if (tem)
5606                 return expand_binop (target_mode, add_optab, tem,
5607                                      gen_int_mode (normalizep, target_mode),
5608                                      target, 0, OPTAB_WIDEN);
5609             }
5610           else if (!want_add
5611                    && rtx_cost (trueval, mode, XOR, 1,
5612                                 optimize_insn_for_speed_p ()) == 0)
5613             {
5614               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5615                                        normalizep, target_mode);
5616               if (tem)
5617                 return expand_binop (target_mode, xor_optab, tem, trueval,
5618                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5619             }
5620         }
5621
5622       delete_insns_since (last);
5623
5624       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5625       if (code == ORDERED || code == UNORDERED)
5626         return 0;
5627
5628       and_them = split_comparison (code, mode, &first_code, &code);
5629
5630       /* If there are no NaNs, the first comparison should always fall through.
5631          Effectively change the comparison to the other one.  */
5632       if (!HONOR_NANS (mode))
5633         {
5634           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5635           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5636                                     target_mode);
5637         }
5638
5639       if (!HAVE_conditional_move)
5640         return 0;
5641
5642       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5643          conditional move.  */
5644       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5645                                normalizep, target_mode);
5646       if (tem == 0)
5647         return 0;
5648
5649       if (and_them)
5650         tem = emit_conditional_move (target, code, op0, op1, mode,
5651                                      tem, const0_rtx, GET_MODE (tem), 0);
5652       else
5653         tem = emit_conditional_move (target, code, op0, op1, mode,
5654                                      trueval, tem, GET_MODE (tem), 0);
5655
5656       if (tem == 0)
5657         delete_insns_since (last);
5658       return tem;
5659     }
5660
5661   /* The remaining tricks only apply to integer comparisons.  */
5662
5663   if (GET_MODE_CLASS (mode) != MODE_INT)
5664     return 0;
5665
5666   /* If this is an equality comparison of integers, we can try to exclusive-or
5667      (or subtract) the two operands and use a recursive call to try the
5668      comparison with zero.  Don't do any of these cases if branches are
5669      very cheap.  */
5670
5671   if ((code == EQ || code == NE) && op1 != const0_rtx)
5672     {
5673       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5674                           OPTAB_WIDEN);
5675
5676       if (tem == 0)
5677         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5678                             OPTAB_WIDEN);
5679       if (tem != 0)
5680         tem = emit_store_flag (target, code, tem, const0_rtx,
5681                                mode, unsignedp, normalizep);
5682       if (tem != 0)
5683         return tem;
5684
5685       delete_insns_since (last);
5686     }
5687
5688   /* For integer comparisons, try the reverse comparison.  However, for
5689      small X and if we'd have anyway to extend, implementing "X != 0"
5690      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5691   rcode = reverse_condition (code);
5692   if (can_compare_p (rcode, mode, ccp_store_flag)
5693       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5694             && code == NE
5695             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5696             && op1 == const0_rtx))
5697     {
5698       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5699                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5700
5701       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5702       if (want_add
5703           && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5704                        optimize_insn_for_speed_p ()) == 0)
5705         {
5706           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5707                                    STORE_FLAG_VALUE, target_mode);
5708           if (tem != 0)
5709             tem = expand_binop (target_mode, add_optab, tem,
5710                                 gen_int_mode (normalizep, target_mode),
5711                                 target, 0, OPTAB_WIDEN);
5712         }
5713       else if (!want_add
5714                && rtx_cost (trueval, mode, XOR, 1,
5715                             optimize_insn_for_speed_p ()) == 0)
5716         {
5717           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5718                                    normalizep, target_mode);
5719           if (tem != 0)
5720             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5721                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5722         }
5723
5724       if (tem != 0)
5725         return tem;
5726       delete_insns_since (last);
5727     }
5728
5729   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5730      the constant zero.  Reject all other comparisons at this point.  Only
5731      do LE and GT if branches are expensive since they are expensive on
5732      2-operand machines.  */
5733
5734   if (op1 != const0_rtx
5735       || (code != EQ && code != NE
5736           && (BRANCH_COST (optimize_insn_for_speed_p (),
5737                            false) <= 1 || (code != LE && code != GT))))
5738     return 0;
5739
5740   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5741      do the necessary operation below.  */
5742
5743   tem = 0;
5744
5745   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5746      the sign bit set.  */
5747
5748   if (code == LE)
5749     {
5750       /* This is destructive, so SUBTARGET can't be OP0.  */
5751       if (rtx_equal_p (subtarget, op0))
5752         subtarget = 0;
5753
5754       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5755                           OPTAB_WIDEN);
5756       if (tem)
5757         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5758                             OPTAB_WIDEN);
5759     }
5760
5761   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5762      number of bits in the mode of OP0, minus one.  */
5763
5764   if (code == GT)
5765     {
5766       if (rtx_equal_p (subtarget, op0))
5767         subtarget = 0;
5768
5769       tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5770                                 GET_MODE_BITSIZE (mode) - 1,
5771                                 subtarget, 0);
5772       if (tem)
5773         tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5774                             OPTAB_WIDEN);
5775     }
5776
5777   if (code == EQ || code == NE)
5778     {
5779       /* For EQ or NE, one way to do the comparison is to apply an operation
5780          that converts the operand into a positive number if it is nonzero
5781          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5782          for NE we negate.  This puts the result in the sign bit.  Then we
5783          normalize with a shift, if needed.
5784
5785          Two operations that can do the above actions are ABS and FFS, so try
5786          them.  If that doesn't work, and MODE is smaller than a full word,
5787          we can use zero-extension to the wider mode (an unsigned conversion)
5788          as the operation.  */
5789
5790       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5791          that is compensated by the subsequent overflow when subtracting
5792          one / negating.  */
5793
5794       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5795         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5796       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5797         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5798       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5799         {
5800           tem = convert_modes (word_mode, mode, op0, 1);
5801           mode = word_mode;
5802         }
5803
5804       if (tem != 0)
5805         {
5806           if (code == EQ)
5807             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5808                                 0, OPTAB_WIDEN);
5809           else
5810             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5811         }
5812
5813       /* If we couldn't do it that way, for NE we can "or" the two's complement
5814          of the value with itself.  For EQ, we take the one's complement of
5815          that "or", which is an extra insn, so we only handle EQ if branches
5816          are expensive.  */
5817
5818       if (tem == 0
5819           && (code == NE
5820               || BRANCH_COST (optimize_insn_for_speed_p (),
5821                               false) > 1))
5822         {
5823           if (rtx_equal_p (subtarget, op0))
5824             subtarget = 0;
5825
5826           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5827           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5828                               OPTAB_WIDEN);
5829
5830           if (tem && code == EQ)
5831             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5832         }
5833     }
5834
5835   if (tem && normalizep)
5836     tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5837                               GET_MODE_BITSIZE (mode) - 1,
5838                               subtarget, normalizep == 1);
5839
5840   if (tem)
5841     {
5842       if (!target)
5843         ;
5844       else if (GET_MODE (tem) != target_mode)
5845         {
5846           convert_move (target, tem, 0);
5847           tem = target;
5848         }
5849       else if (!subtarget)
5850         {
5851           emit_move_insn (target, tem);
5852           tem = target;
5853         }
5854     }
5855   else
5856     delete_insns_since (last);
5857
5858   return tem;
5859 }
5860
5861 /* Like emit_store_flag, but always succeeds.  */
5862
5863 rtx
5864 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5865                        machine_mode mode, int unsignedp, int normalizep)
5866 {
5867   rtx tem;
5868   rtx_code_label *label;
5869   rtx trueval, falseval;
5870
5871   /* First see if emit_store_flag can do the job.  */
5872   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5873   if (tem != 0)
5874     return tem;
5875
5876   if (!target)
5877     target = gen_reg_rtx (word_mode);
5878
5879   /* If this failed, we have to do this with set/compare/jump/set code.
5880      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5881   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5882   if (code == NE
5883       && GET_MODE_CLASS (mode) == MODE_INT
5884       && REG_P (target)
5885       && op0 == target
5886       && op1 == const0_rtx)
5887     {
5888       label = gen_label_rtx ();
5889       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
5890                                NULL_RTX, NULL, label, -1);
5891       emit_move_insn (target, trueval);
5892       emit_label (label);
5893       return target;
5894     }
5895
5896   if (!REG_P (target)
5897       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5898     target = gen_reg_rtx (GET_MODE (target));
5899
5900   /* Jump in the right direction if the target cannot implement CODE
5901      but can jump on its reverse condition.  */
5902   falseval = const0_rtx;
5903   if (! can_compare_p (code, mode, ccp_jump)
5904       && (! FLOAT_MODE_P (mode)
5905           || code == ORDERED || code == UNORDERED
5906           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5907           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5908     {
5909       enum rtx_code rcode;
5910       if (FLOAT_MODE_P (mode))
5911         rcode = reverse_condition_maybe_unordered (code);
5912       else
5913         rcode = reverse_condition (code);
5914
5915       /* Canonicalize to UNORDERED for the libcall.  */
5916       if (can_compare_p (rcode, mode, ccp_jump)
5917           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5918         {
5919           falseval = trueval;
5920           trueval = const0_rtx;
5921           code = rcode;
5922         }
5923     }
5924
5925   emit_move_insn (target, trueval);
5926   label = gen_label_rtx ();
5927   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
5928                            label, -1);
5929
5930   emit_move_insn (target, falseval);
5931   emit_label (label);
5932
5933   return target;
5934 }
5935 \f
5936 /* Perform possibly multi-word comparison and conditional jump to LABEL
5937    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5938    now a thin wrapper around do_compare_rtx_and_jump.  */
5939
5940 static void
5941 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
5942                  rtx_code_label *label)
5943 {
5944   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5945   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
5946                            NULL, label, -1);
5947 }