gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2017 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "target.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "predict.h"
  30 #include "memmodel.h"
  31 #include "tm_p.h"
  32 #include "expmed.h"
  33 #include "optabs.h"
  34 #include "emit-rtl.h"
  35 #include "diagnostic-core.h"
  36 #include "fold-const.h"
  37 #include "stor-layout.h"
  38 #include "dojump.h"
  39 #include "explow.h"
  40 #include "expr.h"
  41 #include "langhooks.h"
  42
  43 struct target_expmed default_target_expmed;
  44 #if SWITCHABLE_TARGET
  45 struct target_expmed *this_target_expmed = &default_target_expmed;
  46 #endif
  47
  48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  49                                    unsigned HOST_WIDE_INT,
  50                                    unsigned HOST_WIDE_INT,
  51                                    unsigned HOST_WIDE_INT,
  52                                    rtx, bool);
  53 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
  54                                      unsigned HOST_WIDE_INT,
  55                                      rtx, bool);
  56 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  57                                    unsigned HOST_WIDE_INT,
  58                                    unsigned HOST_WIDE_INT,
  59                                    unsigned HOST_WIDE_INT,
  60                                    rtx, bool);
  61 static rtx extract_fixed_bit_field (machine_mode, rtx,
  62                                     unsigned HOST_WIDE_INT,
  63                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  64 static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
  65                                       unsigned HOST_WIDE_INT,
  66                                       unsigned HOST_WIDE_INT, rtx, int, bool);
  67 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  68 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  69                                     unsigned HOST_WIDE_INT, int, bool);
  70 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  71 static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  72 static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  73
  74 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  75    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  76    The mask is truncated if necessary to the width of mode MODE.  The
  77    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
  78
  79 static inline rtx
  80 mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
  81 {
  82   return immed_wide_int_const
  83     (wi::shifted_mask (bitpos, bitsize, complement,
  84                        GET_MODE_PRECISION (mode)), mode);
  85 }
  86
  87 /* Test whether a value is zero of a power of two.  */
  88 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
  89   (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
  90
  91 struct init_expmed_rtl
  92 {
  93   rtx reg;
  94   rtx plus;
  95   rtx neg;
  96   rtx mult;
  97   rtx sdiv;
  98   rtx udiv;
  99   rtx sdiv_32;
 100   rtx smod_32;
 101   rtx wide_mult;
 102   rtx wide_lshr;
 103   rtx wide_trunc;
 104   rtx shift;
 105   rtx shift_mult;
 106   rtx shift_add;
 107   rtx shift_sub0;
 108   rtx shift_sub1;
 109   rtx zext;
 110   rtx trunc;
 111
 112   rtx pow2[MAX_BITS_PER_WORD];
 113   rtx cint[MAX_BITS_PER_WORD];
 114 };
 115
 116 static void
 117 init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
 118                       machine_mode from_mode, bool speed)
 119 {
 120   int to_size, from_size;
 121   rtx which;
 122
 123   to_size = GET_MODE_PRECISION (to_mode);
 124   from_size = GET_MODE_PRECISION (from_mode);
 125
 126   /* Most partial integers have a precision less than the "full"
 127      integer it requires for storage.  In case one doesn't, for
 128      comparison purposes here, reduce the bit size by one in that
 129      case.  */
 130   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 131       && pow2p_hwi (to_size))
 132     to_size --;
 133   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 134       && pow2p_hwi (from_size))
 135     from_size --;
 136
 137   /* Assume cost of zero-extend and sign-extend is the same.  */
 138   which = (to_size < from_size ? all->trunc : all->zext);
 139
 140   PUT_MODE (all->reg, from_mode);
 141   set_convert_cost (to_mode, from_mode, speed,
 142                     set_src_cost (which, to_mode, speed));
 143 }
 144
 145 static void
 146 init_expmed_one_mode (struct init_expmed_rtl *all,
 147                       machine_mode mode, int speed)
 148 {
 149   int m, n, mode_bitsize;
 150   machine_mode mode_from;
 151
 152   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 153
 154   PUT_MODE (all->reg, mode);
 155   PUT_MODE (all->plus, mode);
 156   PUT_MODE (all->neg, mode);
 157   PUT_MODE (all->mult, mode);
 158   PUT_MODE (all->sdiv, mode);
 159   PUT_MODE (all->udiv, mode);
 160   PUT_MODE (all->sdiv_32, mode);
 161   PUT_MODE (all->smod_32, mode);
 162   PUT_MODE (all->wide_trunc, mode);
 163   PUT_MODE (all->shift, mode);
 164   PUT_MODE (all->shift_mult, mode);
 165   PUT_MODE (all->shift_add, mode);
 166   PUT_MODE (all->shift_sub0, mode);
 167   PUT_MODE (all->shift_sub1, mode);
 168   PUT_MODE (all->zext, mode);
 169   PUT_MODE (all->trunc, mode);
 170
 171   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
 172   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
 173   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
 174   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
 175   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
 176
 177   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
 178                                      <= 2 * add_cost (speed, mode)));
 179   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
 180                                      <= 4 * add_cost (speed, mode)));
 181
 182   set_shift_cost (speed, mode, 0, 0);
 183   {
 184     int cost = add_cost (speed, mode);
 185     set_shiftadd_cost (speed, mode, 0, cost);
 186     set_shiftsub0_cost (speed, mode, 0, cost);
 187     set_shiftsub1_cost (speed, mode, 0, cost);
 188   }
 189
 190   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 191   for (m = 1; m < n; m++)
 192     {
 193       XEXP (all->shift, 1) = all->cint[m];
 194       XEXP (all->shift_mult, 1) = all->pow2[m];
 195
 196       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
 197       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
 198                                                        speed));
 199       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
 200                                                         speed));
 201       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
 202                                                         speed));
 203     }
 204
 205   if (SCALAR_INT_MODE_P (mode))
 206     {
 207       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 208            mode_from = (machine_mode)(mode_from + 1))
 209         init_expmed_one_conv (all, mode, mode_from, speed);
 210     }
 211   if (GET_MODE_CLASS (mode) == MODE_INT)
 212     {
 213       machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
 214       if (wider_mode != VOIDmode)
 215         {
 216           PUT_MODE (all->zext, wider_mode);
 217           PUT_MODE (all->wide_mult, wider_mode);
 218           PUT_MODE (all->wide_lshr, wider_mode);
 219           XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 220
 221           set_mul_widen_cost (speed, wider_mode,
 222                               set_src_cost (all->wide_mult, wider_mode, speed));
 223           set_mul_highpart_cost (speed, mode,
 224                                  set_src_cost (all->wide_trunc, mode, speed));
 225         }
 226     }
 227 }
 228
 229 void
 230 init_expmed (void)
 231 {
 232   struct init_expmed_rtl all;
 233   machine_mode mode = QImode;
 234   int m, speed;
 235
 236   memset (&all, 0, sizeof all);
 237   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 238     {
 239       all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
 240       all.cint[m] = GEN_INT (m);
 241     }
 242
 243   /* Avoid using hard regs in ways which may be unsupported.  */
 244   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 245   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 246   all.neg = gen_rtx_NEG (mode, all.reg);
 247   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 248   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 249   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 250   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 251   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 252   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 253   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 254   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 255   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 256   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 257   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 258   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 259   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 260   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 261   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 262
 263   for (speed = 0; speed < 2; speed++)
 264     {
 265       crtl->maybe_hot_insn_p = speed;
 266       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
 267
 268       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 269            mode = (machine_mode)(mode + 1))
 270         init_expmed_one_mode (&all, mode, speed);
 271
 272       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 273         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 274              mode = (machine_mode)(mode + 1))
 275           init_expmed_one_mode (&all, mode, speed);
 276
 277       if (MIN_MODE_VECTOR_INT != VOIDmode)
 278         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 279              mode = (machine_mode)(mode + 1))
 280           init_expmed_one_mode (&all, mode, speed);
 281     }
 282
 283   if (alg_hash_used_p ())
 284     {
 285       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 286       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 287     }
 288   else
 289     set_alg_hash_used_p (true);
 290   default_rtl_profile ();
 291
 292   ggc_free (all.trunc);
 293   ggc_free (all.shift_sub1);
 294   ggc_free (all.shift_sub0);
 295   ggc_free (all.shift_add);
 296   ggc_free (all.shift_mult);
 297   ggc_free (all.shift);
 298   ggc_free (all.wide_trunc);
 299   ggc_free (all.wide_lshr);
 300   ggc_free (all.wide_mult);
 301   ggc_free (all.zext);
 302   ggc_free (all.smod_32);
 303   ggc_free (all.sdiv_32);
 304   ggc_free (all.udiv);
 305   ggc_free (all.sdiv);
 306   ggc_free (all.mult);
 307   ggc_free (all.neg);
 308   ggc_free (all.plus);
 309   ggc_free (all.reg);
 310 }
 311
 312 /* Return an rtx representing minus the value of X.
 313    MODE is the intended mode of the result,
 314    useful if X is a CONST_INT.  */
 315
 316 rtx
 317 negate_rtx (machine_mode mode, rtx x)
 318 {
 319   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 320
 321   if (result == 0)
 322     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 323
 324   return result;
 325 }
 326
 327 /* Whether reverse storage order is supported on the target.  */
 328 static int reverse_storage_order_supported = -1;
 329
 330 /* Check whether reverse storage order is supported on the target.  */
 331
 332 static void
 333 check_reverse_storage_order_support (void)
 334 {
 335   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 336     {
 337       reverse_storage_order_supported = 0;
 338       sorry ("reverse scalar storage order");
 339     }
 340   else
 341     reverse_storage_order_supported = 1;
 342 }
 343
 344 /* Whether reverse FP storage order is supported on the target.  */
 345 static int reverse_float_storage_order_supported = -1;
 346
 347 /* Check whether reverse FP storage order is supported on the target.  */
 348
 349 static void
 350 check_reverse_float_storage_order_support (void)
 351 {
 352   if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 353     {
 354       reverse_float_storage_order_supported = 0;
 355       sorry ("reverse floating-point scalar storage order");
 356     }
 357   else
 358     reverse_float_storage_order_supported = 1;
 359 }
 360
 361 /* Return an rtx representing value of X with reverse storage order.
 362    MODE is the intended mode of the result,
 363    useful if X is a CONST_INT.  */
 364
 365 rtx
 366 flip_storage_order (machine_mode mode, rtx x)
 367 {
 368   machine_mode int_mode;
 369   rtx result;
 370
 371   if (mode == QImode)
 372     return x;
 373
 374   if (COMPLEX_MODE_P (mode))
 375     {
 376       rtx real = read_complex_part (x, false);
 377       rtx imag = read_complex_part (x, true);
 378
 379       real = flip_storage_order (GET_MODE_INNER (mode), real);
 380       imag = flip_storage_order (GET_MODE_INNER (mode), imag);
 381
 382       return gen_rtx_CONCAT (mode, real, imag);
 383     }
 384
 385   if (__builtin_expect (reverse_storage_order_supported < 0, 0))
 386     check_reverse_storage_order_support ();
 387
 388   if (SCALAR_INT_MODE_P (mode))
 389     int_mode = mode;
 390   else
 391     {
 392       if (FLOAT_MODE_P (mode)
 393           && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
 394         check_reverse_float_storage_order_support ();
 395
 396       int_mode = mode_for_size (GET_MODE_PRECISION (mode), MODE_INT, 0);
 397       if (int_mode == BLKmode)
 398         {
 399           sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
 400           return x;
 401         }
 402       x = gen_lowpart (int_mode, x);
 403     }
 404
 405   result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
 406   if (result == 0)
 407     result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
 408
 409   if (int_mode != mode)
 410     result = gen_lowpart (mode, result);
 411
 412   return result;
 413 }
 414
 415 /* Adjust bitfield memory MEM so that it points to the first unit of mode
 416    MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
 417    If MODE is BLKmode, return a reference to every byte in the bitfield.
 418    Set *NEW_BITNUM to the bit position of the field within the new memory.  */
 419
 420 static rtx
 421 narrow_bit_field_mem (rtx mem, machine_mode mode,
 422                       unsigned HOST_WIDE_INT bitsize,
 423                       unsigned HOST_WIDE_INT bitnum,
 424                       unsigned HOST_WIDE_INT *new_bitnum)
 425 {
 426   if (mode == BLKmode)
 427     {
 428       *new_bitnum = bitnum % BITS_PER_UNIT;
 429       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 430       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 431                             / BITS_PER_UNIT);
 432       return adjust_bitfield_address_size (mem, mode, offset, size);
 433     }
 434   else
 435     {
 436       unsigned int unit = GET_MODE_BITSIZE (mode);
 437       *new_bitnum = bitnum % unit;
 438       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 439       return adjust_bitfield_address (mem, mode, offset);
 440     }
 441 }
 442
 443 /* The caller wants to perform insertion or extraction PATTERN on a
 444    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 445    BITREGION_START and BITREGION_END are as for store_bit_field
 446    and FIELDMODE is the natural mode of the field.
 447
 448    Search for a mode that is compatible with the memory access
 449    restrictions and (where applicable) with a register insertion or
 450    extraction.  Return the new memory on success, storing the adjusted
 451    bit position in *NEW_BITNUM.  Return null otherwise.  */
 452
 453 static rtx
 454 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 455                               rtx op0, HOST_WIDE_INT bitsize,
 456                               HOST_WIDE_INT bitnum,
 457                               unsigned HOST_WIDE_INT bitregion_start,
 458                               unsigned HOST_WIDE_INT bitregion_end,
 459                               machine_mode fieldmode,
 460                               unsigned HOST_WIDE_INT *new_bitnum)
 461 {
 462   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 463                                 bitregion_end, MEM_ALIGN (op0),
 464                                 MEM_VOLATILE_P (op0));
 465   machine_mode best_mode;
 466   if (iter.next_mode (&best_mode))
 467     {
 468       /* We can use a memory in BEST_MODE.  See whether this is true for
 469          any wider modes.  All other things being equal, we prefer to
 470          use the widest mode possible because it tends to expose more
 471          CSE opportunities.  */
 472       if (!iter.prefer_smaller_modes ())
 473         {
 474           /* Limit the search to the mode required by the corresponding
 475              register insertion or extraction instruction, if any.  */
 476           machine_mode limit_mode = word_mode;
 477           extraction_insn insn;
 478           if (get_best_reg_extraction_insn (&insn, pattern,
 479                                             GET_MODE_BITSIZE (best_mode),
 480                                             fieldmode))
 481             limit_mode = insn.field_mode;
 482
 483           machine_mode wider_mode;
 484           while (iter.next_mode (&wider_mode)
 485                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 486             best_mode = wider_mode;
 487         }
 488       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 489                                    new_bitnum);
 490     }
 491   return NULL_RTX;
 492 }
 493
 494 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 495    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 496    offset is then BITNUM / BITS_PER_UNIT.  */
 497
 498 static bool
 499 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 500                      unsigned HOST_WIDE_INT bitsize,
 501                      machine_mode struct_mode)
 502 {
 503   if (BYTES_BIG_ENDIAN)
 504     return (bitnum % BITS_PER_UNIT == 0
 505             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 506                 || (bitnum + bitsize) % BITS_PER_WORD == 0));
 507   else
 508     return bitnum % BITS_PER_WORD == 0;
 509 }
 510
 511 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 512    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 513    Return false if the access would touch memory outside the range
 514    BITREGION_START to BITREGION_END for conformance to the C++ memory
 515    model.  */
 516
 517 static bool
 518 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 519                             unsigned HOST_WIDE_INT bitnum,
 520                             machine_mode fieldmode,
 521                             unsigned HOST_WIDE_INT bitregion_start,
 522                             unsigned HOST_WIDE_INT bitregion_end)
 523 {
 524   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 525
 526   /* -fstrict-volatile-bitfields must be enabled and we must have a
 527      volatile MEM.  */
 528   if (!MEM_P (op0)
 529       || !MEM_VOLATILE_P (op0)
 530       || flag_strict_volatile_bitfields <= 0)
 531     return false;
 532
 533   /* Non-integral modes likely only happen with packed structures.
 534      Punt.  */
 535   if (!SCALAR_INT_MODE_P (fieldmode))
 536     return false;
 537
 538   /* The bit size must not be larger than the field mode, and
 539      the field mode must not be larger than a word.  */
 540   if (bitsize > modesize || modesize > BITS_PER_WORD)
 541     return false;
 542
 543   /* Check for cases of unaligned fields that must be split.  */
 544   if (bitnum % modesize + bitsize > modesize)
 545     return false;
 546
 547   /* The memory must be sufficiently aligned for a MODESIZE access.
 548      This condition guarantees, that the memory access will not
 549      touch anything after the end of the structure.  */
 550   if (MEM_ALIGN (op0) < modesize)
 551     return false;
 552
 553   /* Check for cases where the C++ memory model applies.  */
 554   if (bitregion_end != 0
 555       && (bitnum - bitnum % modesize < bitregion_start
 556           || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
 557     return false;
 558
 559   return true;
 560 }
 561
 562 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 563    bit number BITNUM can be treated as a simple value of mode MODE.  */
 564
 565 static bool
 566 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 567                        unsigned HOST_WIDE_INT bitnum, machine_mode mode)
 568 {
 569   return (MEM_P (op0)
 570           && bitnum % BITS_PER_UNIT == 0
 571           && bitsize == GET_MODE_BITSIZE (mode)
 572           && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
 573               || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
 574                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 575 }
 576 \f
 577 /* Try to use instruction INSV to store VALUE into a field of OP0.
 578    BITSIZE and BITNUM are as for store_bit_field.  */
 579
 580 static bool
 581 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 582                             unsigned HOST_WIDE_INT bitsize,
 583                             unsigned HOST_WIDE_INT bitnum,
 584                             rtx value)
 585 {
 586   struct expand_operand ops[4];
 587   rtx value1;
 588   rtx xop0 = op0;
 589   rtx_insn *last = get_last_insn ();
 590   bool copy_back = false;
 591
 592   machine_mode op_mode = insv->field_mode;
 593   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 594   if (bitsize == 0 || bitsize > unit)
 595     return false;
 596
 597   if (MEM_P (xop0))
 598     /* Get a reference to the first byte of the field.  */
 599     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 600                                  &bitnum);
 601   else
 602     {
 603       /* Convert from counting within OP0 to counting in OP_MODE.  */
 604       if (BYTES_BIG_ENDIAN)
 605         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 606
 607       /* If xop0 is a register, we need it in OP_MODE
 608          to make it acceptable to the format of insv.  */
 609       if (GET_CODE (xop0) == SUBREG)
 610         /* We can't just change the mode, because this might clobber op0,
 611            and we will need the original value of op0 if insv fails.  */
 612         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 613       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 614         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 615     }
 616
 617   /* If the destination is a paradoxical subreg such that we need a
 618      truncate to the inner mode, perform the insertion on a temporary and
 619      truncate the result to the original destination.  Note that we can't
 620      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 621      X) 0)) is (reg:N X).  */
 622   if (GET_CODE (xop0) == SUBREG
 623       && REG_P (SUBREG_REG (xop0))
 624       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 625                                          op_mode))
 626     {
 627       rtx tem = gen_reg_rtx (op_mode);
 628       emit_move_insn (tem, xop0);
 629       xop0 = tem;
 630       copy_back = true;
 631     }
 632
 633   /* There are similar overflow check at the start of store_bit_field_1,
 634      but that only check the situation where the field lies completely
 635      outside the register, while there do have situation where the field
 636      lies partialy in the register, we need to adjust bitsize for this
 637      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
 638      will broken on those arch support bit insert instruction, like arm, aarch64
 639      etc.  */
 640   if (bitsize + bitnum > unit && bitnum < unit)
 641     {
 642       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
 643                "destination object, data truncated into %wu-bit",
 644                bitsize, unit - bitnum);
 645       bitsize = unit - bitnum;
 646     }
 647
 648   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 649      "backwards" from the size of the unit we are inserting into.
 650      Otherwise, we count bits from the most significant on a
 651      BYTES/BITS_BIG_ENDIAN machine.  */
 652
 653   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 654     bitnum = unit - bitsize - bitnum;
 655
 656   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 657   value1 = value;
 658   if (GET_MODE (value) != op_mode)
 659     {
 660       if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 661         {
 662           rtx tmp;
 663           /* Optimization: Don't bother really extending VALUE
 664              if it has all the bits we will actually use.  However,
 665              if we must narrow it, be sure we do it correctly.  */
 666
 667           if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 668             {
 669               tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 670               if (! tmp)
 671                 tmp = simplify_gen_subreg (op_mode,
 672                                            force_reg (GET_MODE (value),
 673                                                       value1),
 674                                            GET_MODE (value), 0);
 675             }
 676           else
 677             {
 678               tmp = gen_lowpart_if_possible (op_mode, value1);
 679               if (! tmp)
 680                 tmp = gen_lowpart (op_mode, force_reg (GET_MODE (value),
 681                                                        value1));
 682             }
 683           value1 = tmp;
 684         }
 685       else if (CONST_INT_P (value))
 686         value1 = gen_int_mode (INTVAL (value), op_mode);
 687       else
 688         /* Parse phase is supposed to make VALUE's data type
 689            match that of the component reference, which is a type
 690            at least as wide as the field; so VALUE should have
 691            a mode that corresponds to that type.  */
 692         gcc_assert (CONSTANT_P (value));
 693     }
 694
 695   create_fixed_operand (&ops[0], xop0);
 696   create_integer_operand (&ops[1], bitsize);
 697   create_integer_operand (&ops[2], bitnum);
 698   create_input_operand (&ops[3], value1, op_mode);
 699   if (maybe_expand_insn (insv->icode, 4, ops))
 700     {
 701       if (copy_back)
 702         convert_move (op0, xop0, true);
 703       return true;
 704     }
 705   delete_insns_since (last);
 706   return false;
 707 }
 708
 709 /* A subroutine of store_bit_field, with the same arguments.  Return true
 710    if the operation could be implemented.
 711
 712    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 713    no other way of implementing the operation.  If FALLBACK_P is false,
 714    return false instead.  */
 715
 716 static bool
 717 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 718                    unsigned HOST_WIDE_INT bitnum,
 719                    unsigned HOST_WIDE_INT bitregion_start,
 720                    unsigned HOST_WIDE_INT bitregion_end,
 721                    machine_mode fieldmode,
 722                    rtx value, bool reverse, bool fallback_p)
 723 {
 724   rtx op0 = str_rtx;
 725   rtx orig_value;
 726
 727   while (GET_CODE (op0) == SUBREG)
 728     {
 729       /* The following line once was done only if WORDS_BIG_ENDIAN,
 730          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 731          meaningful at a much higher level; when structures are copied
 732          between memory and regs, the higher-numbered regs
 733          always get higher addresses.  */
 734       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 735       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 736       int byte_offset = 0;
 737
 738       /* Paradoxical subregs need special handling on big-endian machines.  */
 739       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 740         {
 741           int difference = inner_mode_size - outer_mode_size;
 742
 743           if (WORDS_BIG_ENDIAN)
 744             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 745           if (BYTES_BIG_ENDIAN)
 746             byte_offset += difference % UNITS_PER_WORD;
 747         }
 748       else
 749         byte_offset = SUBREG_BYTE (op0);
 750
 751       bitnum += byte_offset * BITS_PER_UNIT;
 752       op0 = SUBREG_REG (op0);
 753     }
 754
 755   /* No action is needed if the target is a register and if the field
 756      lies completely outside that register.  This can occur if the source
 757      code contains an out-of-bounds access to a small array.  */
 758   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 759     return true;
 760
 761   /* Use vec_set patterns for inserting parts of vectors whenever
 762      available.  */
 763   if (VECTOR_MODE_P (GET_MODE (op0))
 764       && !MEM_P (op0)
 765       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 766       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 767       && bitsize == GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
 768       && !(bitnum % GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
 769     {
 770       struct expand_operand ops[3];
 771       machine_mode outermode = GET_MODE (op0);
 772       machine_mode innermode = GET_MODE_INNER (outermode);
 773       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 774       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 775
 776       create_fixed_operand (&ops[0], op0);
 777       create_input_operand (&ops[1], value, innermode);
 778       create_integer_operand (&ops[2], pos);
 779       if (maybe_expand_insn (icode, 3, ops))
 780         return true;
 781     }
 782
 783   /* If the target is a register, overwriting the entire object, or storing
 784      a full-word or multi-word field can be done with just a SUBREG.  */
 785   if (!MEM_P (op0)
 786       && bitsize == GET_MODE_BITSIZE (fieldmode)
 787       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 788           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 789     {
 790       /* Use the subreg machinery either to narrow OP0 to the required
 791          words or to cope with mode punning between equal-sized modes.
 792          In the latter case, use subreg on the rhs side, not lhs.  */
 793       rtx sub;
 794
 795       if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
 796         {
 797           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 798           if (sub)
 799             {
 800               if (reverse)
 801                 sub = flip_storage_order (GET_MODE (op0), sub);
 802               emit_move_insn (op0, sub);
 803               return true;
 804             }
 805         }
 806       else
 807         {
 808           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 809                                      bitnum / BITS_PER_UNIT);
 810           if (sub)
 811             {
 812               if (reverse)
 813                 value = flip_storage_order (fieldmode, value);
 814               emit_move_insn (sub, value);
 815               return true;
 816             }
 817         }
 818     }
 819
 820   /* If the target is memory, storing any naturally aligned field can be
 821      done with a simple store.  For targets that support fast unaligned
 822      memory, any naturally sized, unit aligned field can be done directly.  */
 823   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
 824     {
 825       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 826       if (reverse)
 827         value = flip_storage_order (fieldmode, value);
 828       emit_move_insn (op0, value);
 829       return true;
 830     }
 831
 832   /* Make sure we are playing with integral modes.  Pun with subregs
 833      if we aren't.  This must come after the entire register case above,
 834      since that case is valid for any mode.  The following cases are only
 835      valid for integral modes.  */
 836   {
 837     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 838     if (imode != GET_MODE (op0))
 839       {
 840         if (MEM_P (op0))
 841           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
 842         else
 843           {
 844             gcc_assert (imode != BLKmode);
 845             op0 = gen_lowpart (imode, op0);
 846           }
 847       }
 848   }
 849
 850   /* Storing an lsb-aligned field in a register
 851      can be done with a movstrict instruction.  */
 852
 853   if (!MEM_P (op0)
 854       && !reverse
 855       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 856       && bitsize == GET_MODE_BITSIZE (fieldmode)
 857       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 858     {
 859       struct expand_operand ops[2];
 860       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 861       rtx arg0 = op0;
 862       unsigned HOST_WIDE_INT subreg_off;
 863
 864       if (GET_CODE (arg0) == SUBREG)
 865         {
 866           /* Else we've got some float mode source being extracted into
 867              a different float mode destination -- this combination of
 868              subregs results in Severe Tire Damage.  */
 869           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 870                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 871                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 872           arg0 = SUBREG_REG (arg0);
 873         }
 874
 875       subreg_off = bitnum / BITS_PER_UNIT;
 876       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 877         {
 878           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 879
 880           create_fixed_operand (&ops[0], arg0);
 881           /* Shrink the source operand to FIELDMODE.  */
 882           create_convert_operand_to (&ops[1], value, fieldmode, false);
 883           if (maybe_expand_insn (icode, 2, ops))
 884             return true;
 885         }
 886     }
 887
 888   /* Handle fields bigger than a word.  */
 889
 890   if (bitsize > BITS_PER_WORD)
 891     {
 892       /* Here we transfer the words of the field
 893          in the order least significant first.
 894          This is because the most significant word is the one which may
 895          be less than full.
 896          However, only do that if the value is not BLKmode.  */
 897
 898       const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 899       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 900       unsigned int i;
 901       rtx_insn *last;
 902
 903       /* This is the mode we must force value to, so that there will be enough
 904          subwords to extract.  Note that fieldmode will often (always?) be
 905          VOIDmode, because that is what store_field uses to indicate that this
 906          is a bit field, but passing VOIDmode to operand_subword_force
 907          is not allowed.  */
 908       fieldmode = GET_MODE (value);
 909       if (fieldmode == VOIDmode)
 910         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 911
 912       last = get_last_insn ();
 913       for (i = 0; i < nwords; i++)
 914         {
 915           /* If I is 0, use the low-order word in both field and target;
 916              if I is 1, use the next to lowest word; and so on.  */
 917           unsigned int wordnum = (backwards
 918                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 919                                   - i - 1
 920                                   : i);
 921           unsigned int bit_offset = (backwards ^ reverse
 922                                      ? MAX ((int) bitsize - ((int) i + 1)
 923                                             * BITS_PER_WORD,
 924                                             0)
 925                                      : (int) i * BITS_PER_WORD);
 926           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 927           unsigned HOST_WIDE_INT new_bitsize =
 928             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 929
 930           /* If the remaining chunk doesn't have full wordsize we have
 931              to make sure that for big-endian machines the higher order
 932              bits are used.  */
 933           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 934             value_word = simplify_expand_binop (word_mode, lshr_optab,
 935                                                 value_word,
 936                                                 GEN_INT (BITS_PER_WORD
 937                                                          - new_bitsize),
 938                                                 NULL_RTX, true,
 939                                                 OPTAB_LIB_WIDEN);
 940
 941           if (!store_bit_field_1 (op0, new_bitsize,
 942                                   bitnum + bit_offset,
 943                                   bitregion_start, bitregion_end,
 944                                   word_mode,
 945                                   value_word, reverse, fallback_p))
 946             {
 947               delete_insns_since (last);
 948               return false;
 949             }
 950         }
 951       return true;
 952     }
 953
 954   /* If VALUE has a floating-point or complex mode, access it as an
 955      integer of the corresponding size.  This can occur on a machine
 956      with 64 bit registers that uses SFmode for float.  It can also
 957      occur for unaligned float or complex fields.  */
 958   orig_value = value;
 959   if (GET_MODE (value) != VOIDmode
 960       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 961       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 962     {
 963       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 964       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 965     }
 966
 967   /* If OP0 is a multi-word register, narrow it to the affected word.
 968      If the region spans two words, defer to store_split_bit_field.
 969      Don't do this if op0 is a single hard register wider than word
 970      such as a float or vector register.  */
 971   if (!MEM_P (op0)
 972       && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD
 973       && (!REG_P (op0)
 974           || !HARD_REGISTER_P (op0)
 975           || HARD_REGNO_NREGS (REGNO (op0), GET_MODE (op0)) != 1))
 976     {
 977       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
 978         {
 979           if (!fallback_p)
 980             return false;
 981
 982           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 983                                  bitregion_end, value, reverse);
 984           return true;
 985         }
 986       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
 987                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 988       gcc_assert (op0);
 989       bitnum %= BITS_PER_WORD;
 990     }
 991
 992   /* From here on we can assume that the field to be stored in fits
 993      within a word.  If the destination is a register, it too fits
 994      in a word.  */
 995
 996   extraction_insn insv;
 997   if (!MEM_P (op0)
 998       && !reverse
 999       && get_best_reg_extraction_insn (&insv, EP_insv,
1000                                        GET_MODE_BITSIZE (GET_MODE (op0)),
1001                                        fieldmode)
1002       && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
1003     return true;
1004
1005   /* If OP0 is a memory, try copying it to a register and seeing if a
1006      cheap register alternative is available.  */
1007   if (MEM_P (op0) && !reverse)
1008     {
1009       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1010                                         fieldmode)
1011           && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
1012         return true;
1013
1014       rtx_insn *last = get_last_insn ();
1015
1016       /* Try loading part of OP0 into a register, inserting the bitfield
1017          into that, and then copying the result back to OP0.  */
1018       unsigned HOST_WIDE_INT bitpos;
1019       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1020                                                bitregion_start, bitregion_end,
1021                                                fieldmode, &bitpos);
1022       if (xop0)
1023         {
1024           rtx tempreg = copy_to_reg (xop0);
1025           if (store_bit_field_1 (tempreg, bitsize, bitpos,
1026                                  bitregion_start, bitregion_end,
1027                                  fieldmode, orig_value, reverse, false))
1028             {
1029               emit_move_insn (xop0, tempreg);
1030               return true;
1031             }
1032           delete_insns_since (last);
1033         }
1034     }
1035
1036   if (!fallback_p)
1037     return false;
1038
1039   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
1040                          bitregion_end, value, reverse);
1041   return true;
1042 }
1043
1044 /* Generate code to store value from rtx VALUE
1045    into a bit-field within structure STR_RTX
1046    containing BITSIZE bits starting at bit BITNUM.
1047
1048    BITREGION_START is bitpos of the first bitfield in this region.
1049    BITREGION_END is the bitpos of the ending bitfield in this region.
1050    These two fields are 0, if the C++ memory model does not apply,
1051    or we are not interested in keeping track of bitfield regions.
1052
1053    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1054
1055    If REVERSE is true, the store is to be done in reverse order.  */
1056
1057 void
1058 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1059                  unsigned HOST_WIDE_INT bitnum,
1060                  unsigned HOST_WIDE_INT bitregion_start,
1061                  unsigned HOST_WIDE_INT bitregion_end,
1062                  machine_mode fieldmode,
1063                  rtx value, bool reverse)
1064 {
1065   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1066   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
1067                                   bitregion_start, bitregion_end))
1068     {
1069       /* Storing of a full word can be done with a simple store.
1070          We know here that the field can be accessed with one single
1071          instruction.  For targets that support unaligned memory,
1072          an unaligned access may be necessary.  */
1073       if (bitsize == GET_MODE_BITSIZE (fieldmode))
1074         {
1075           str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
1076                                              bitnum / BITS_PER_UNIT);
1077           if (reverse)
1078             value = flip_storage_order (fieldmode, value);
1079           gcc_assert (bitnum % BITS_PER_UNIT == 0);
1080           emit_move_insn (str_rtx, value);
1081         }
1082       else
1083         {
1084           rtx temp;
1085
1086           str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
1087                                           &bitnum);
1088           gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (fieldmode));
1089           temp = copy_to_reg (str_rtx);
1090           if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
1091                                   fieldmode, value, reverse, true))
1092             gcc_unreachable ();
1093
1094           emit_move_insn (str_rtx, temp);
1095         }
1096
1097       return;
1098     }
1099
1100   /* Under the C++0x memory model, we must not touch bits outside the
1101      bit region.  Adjust the address to start at the beginning of the
1102      bit region.  */
1103   if (MEM_P (str_rtx) && bitregion_start > 0)
1104     {
1105       machine_mode bestmode;
1106       HOST_WIDE_INT offset, size;
1107
1108       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1109
1110       offset = bitregion_start / BITS_PER_UNIT;
1111       bitnum -= bitregion_start;
1112       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1113       bitregion_end -= bitregion_start;
1114       bitregion_start = 0;
1115       bestmode = get_best_mode (bitsize, bitnum,
1116                                 bitregion_start, bitregion_end,
1117                                 MEM_ALIGN (str_rtx), VOIDmode,
1118                                 MEM_VOLATILE_P (str_rtx));
1119       str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
1120     }
1121
1122   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1123                           bitregion_start, bitregion_end,
1124                           fieldmode, value, reverse, true))
1125     gcc_unreachable ();
1126 }
1127 \f
1128 /* Use shifts and boolean operations to store VALUE into a bit field of
1129    width BITSIZE in OP0, starting at bit BITNUM.
1130
1131    If REVERSE is true, the store is to be done in reverse order.  */
1132
1133 static void
1134 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1135                        unsigned HOST_WIDE_INT bitnum,
1136                        unsigned HOST_WIDE_INT bitregion_start,
1137                        unsigned HOST_WIDE_INT bitregion_end,
1138                        rtx value, bool reverse)
1139 {
1140   /* There is a case not handled here:
1141      a structure with a known alignment of just a halfword
1142      and a field split across two aligned halfwords within the structure.
1143      Or likewise a structure with a known alignment of just a byte
1144      and a field split across two bytes.
1145      Such cases are not supposed to be able to occur.  */
1146
1147   if (MEM_P (op0))
1148     {
1149       machine_mode mode = GET_MODE (op0);
1150       if (GET_MODE_BITSIZE (mode) == 0
1151           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1152         mode = word_mode;
1153       mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1154                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1155
1156       if (mode == VOIDmode)
1157         {
1158           /* The only way this should occur is if the field spans word
1159              boundaries.  */
1160           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1161                                  bitregion_end, value, reverse);
1162           return;
1163         }
1164
1165       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1166     }
1167
1168   store_fixed_bit_field_1 (op0, bitsize, bitnum, value, reverse);
1169 }
1170
1171 /* Helper function for store_fixed_bit_field, stores
1172    the bit field always using the MODE of OP0.  */
1173
1174 static void
1175 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1176                          unsigned HOST_WIDE_INT bitnum,
1177                          rtx value, bool reverse)
1178 {
1179   machine_mode mode;
1180   rtx temp;
1181   int all_zero = 0;
1182   int all_one = 0;
1183
1184   mode = GET_MODE (op0);
1185   gcc_assert (SCALAR_INT_MODE_P (mode));
1186
1187   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1188      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1189
1190   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1191     /* BITNUM is the distance between our msb
1192        and that of the containing datum.
1193        Convert it to the distance from the lsb.  */
1194     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1195
1196   /* Now BITNUM is always the distance between our lsb
1197      and that of OP0.  */
1198
1199   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1200      we must first convert its mode to MODE.  */
1201
1202   if (CONST_INT_P (value))
1203     {
1204       unsigned HOST_WIDE_INT v = UINTVAL (value);
1205
1206       if (bitsize < HOST_BITS_PER_WIDE_INT)
1207         v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1208
1209       if (v == 0)
1210         all_zero = 1;
1211       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1212                 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1213                || (bitsize == HOST_BITS_PER_WIDE_INT
1214                    && v == HOST_WIDE_INT_M1U))
1215         all_one = 1;
1216
1217       value = lshift_value (mode, v, bitnum);
1218     }
1219   else
1220     {
1221       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1222                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1223
1224       if (GET_MODE (value) != mode)
1225         value = convert_to_mode (mode, value, 1);
1226
1227       if (must_and)
1228         value = expand_binop (mode, and_optab, value,
1229                               mask_rtx (mode, 0, bitsize, 0),
1230                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1231       if (bitnum > 0)
1232         value = expand_shift (LSHIFT_EXPR, mode, value,
1233                               bitnum, NULL_RTX, 1);
1234     }
1235
1236   if (reverse)
1237     value = flip_storage_order (mode, value);
1238
1239   /* Now clear the chosen bits in OP0,
1240      except that if VALUE is -1 we need not bother.  */
1241   /* We keep the intermediates in registers to allow CSE to combine
1242      consecutive bitfield assignments.  */
1243
1244   temp = force_reg (mode, op0);
1245
1246   if (! all_one)
1247     {
1248       rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1249       if (reverse)
1250         mask = flip_storage_order (mode, mask);
1251       temp = expand_binop (mode, and_optab, temp, mask,
1252                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1253       temp = force_reg (mode, temp);
1254     }
1255
1256   /* Now logical-or VALUE into OP0, unless it is zero.  */
1257
1258   if (! all_zero)
1259     {
1260       temp = expand_binop (mode, ior_optab, temp, value,
1261                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1262       temp = force_reg (mode, temp);
1263     }
1264
1265   if (op0 != temp)
1266     {
1267       op0 = copy_rtx (op0);
1268       emit_move_insn (op0, temp);
1269     }
1270 }
1271 \f
1272 /* Store a bit field that is split across multiple accessible memory objects.
1273
1274    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1275    BITSIZE is the field width; BITPOS the position of its first bit
1276    (within the word).
1277    VALUE is the value to store.
1278
1279    If REVERSE is true, the store is to be done in reverse order.
1280
1281    This does not yet handle fields wider than BITS_PER_WORD.  */
1282
1283 static void
1284 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1285                        unsigned HOST_WIDE_INT bitpos,
1286                        unsigned HOST_WIDE_INT bitregion_start,
1287                        unsigned HOST_WIDE_INT bitregion_end,
1288                        rtx value, bool reverse)
1289 {
1290   unsigned int unit, total_bits, bitsdone = 0;
1291
1292   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1293      much at a time.  */
1294   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1295     unit = BITS_PER_WORD;
1296   else
1297     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1298
1299   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1300      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1301      again, and we will mutually recurse forever.  */
1302   if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1303     unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1304
1305   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1306      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1307      that VALUE might be a floating-point constant.  */
1308   if (CONSTANT_P (value) && !CONST_INT_P (value))
1309     {
1310       rtx word = gen_lowpart_common (word_mode, value);
1311
1312       if (word && (value != word))
1313         value = word;
1314       else
1315         value = gen_lowpart_common (word_mode,
1316                                     force_reg (GET_MODE (value) != VOIDmode
1317                                                ? GET_MODE (value)
1318                                                : word_mode, value));
1319     }
1320
1321   total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1322
1323   while (bitsdone < bitsize)
1324     {
1325       unsigned HOST_WIDE_INT thissize;
1326       unsigned HOST_WIDE_INT thispos;
1327       unsigned HOST_WIDE_INT offset;
1328       rtx part, word;
1329
1330       offset = (bitpos + bitsdone) / unit;
1331       thispos = (bitpos + bitsdone) % unit;
1332
1333       /* When region of bytes we can touch is restricted, decrease
1334          UNIT close to the end of the region as needed.  If op0 is a REG
1335          or SUBREG of REG, don't do this, as there can't be data races
1336          on a register and we can expand shorter code in some cases.  */
1337       if (bitregion_end
1338           && unit > BITS_PER_UNIT
1339           && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1340           && !REG_P (op0)
1341           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1342         {
1343           unit = unit / 2;
1344           continue;
1345         }
1346
1347       /* THISSIZE must not overrun a word boundary.  Otherwise,
1348          store_fixed_bit_field will call us again, and we will mutually
1349          recurse forever.  */
1350       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1351       thissize = MIN (thissize, unit - thispos);
1352
1353       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1354         {
1355           /* Fetch successively less significant portions.  */
1356           if (CONST_INT_P (value))
1357             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1358                              >> (bitsize - bitsdone - thissize))
1359                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1360           /* Likewise, but the source is little-endian.  */
1361           else if (reverse)
1362             part = extract_fixed_bit_field (word_mode, value, thissize,
1363                                             bitsize - bitsdone - thissize,
1364                                             NULL_RTX, 1, false);
1365           else
1366             {
1367               int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1368               /* The args are chosen so that the last part includes the
1369                  lsb.  Give extract_bit_field the value it needs (with
1370                  endianness compensation) to fetch the piece we want.  */
1371               part = extract_fixed_bit_field (word_mode, value, thissize,
1372                                               total_bits - bitsize + bitsdone,
1373                                               NULL_RTX, 1, false);
1374             }
1375         }
1376       else
1377         {
1378           /* Fetch successively more significant portions.  */
1379           if (CONST_INT_P (value))
1380             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1381                              >> bitsdone)
1382                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1383           /* Likewise, but the source is big-endian.  */
1384           else if (reverse)
1385             part = extract_fixed_bit_field (word_mode, value, thissize,
1386                                             total_bits - bitsdone - thissize,
1387                                             NULL_RTX, 1, false);
1388           else
1389             part = extract_fixed_bit_field (word_mode, value, thissize,
1390                                             bitsdone, NULL_RTX, 1, false);
1391         }
1392
1393       /* If OP0 is a register, then handle OFFSET here.  */
1394       if (SUBREG_P (op0) || REG_P (op0))
1395         {
1396           machine_mode op0_mode = GET_MODE (op0);
1397           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1398             word = offset ? const0_rtx : op0;
1399           else
1400             word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1401                                           GET_MODE (op0));
1402           offset &= BITS_PER_WORD / unit - 1;
1403         }
1404       else
1405         word = op0;
1406
1407       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1408          it is just an out-of-bounds access.  Ignore it.  */
1409       if (word != const0_rtx)
1410         store_fixed_bit_field (word, thissize, offset * unit + thispos,
1411                                bitregion_start, bitregion_end, part,
1412                                reverse);
1413       bitsdone += thissize;
1414     }
1415 }
1416 \f
1417 /* A subroutine of extract_bit_field_1 that converts return value X
1418    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1419    to extract_bit_field.  */
1420
1421 static rtx
1422 convert_extracted_bit_field (rtx x, machine_mode mode,
1423                              machine_mode tmode, bool unsignedp)
1424 {
1425   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1426     return x;
1427
1428   /* If the x mode is not a scalar integral, first convert to the
1429      integer mode of that size and then access it as a floating-point
1430      value via a SUBREG.  */
1431   if (!SCALAR_INT_MODE_P (tmode))
1432     {
1433       machine_mode smode;
1434
1435       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1436       x = convert_to_mode (smode, x, unsignedp);
1437       x = force_reg (smode, x);
1438       return gen_lowpart (tmode, x);
1439     }
1440
1441   return convert_to_mode (tmode, x, unsignedp);
1442 }
1443
1444 /* Try to use an ext(z)v pattern to extract a field from OP0.
1445    Return the extracted value on success, otherwise return null.
1446    EXT_MODE is the mode of the extraction and the other arguments
1447    are as for extract_bit_field.  */
1448
1449 static rtx
1450 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1451                               unsigned HOST_WIDE_INT bitsize,
1452                               unsigned HOST_WIDE_INT bitnum,
1453                               int unsignedp, rtx target,
1454                               machine_mode mode, machine_mode tmode)
1455 {
1456   struct expand_operand ops[4];
1457   rtx spec_target = target;
1458   rtx spec_target_subreg = 0;
1459   machine_mode ext_mode = extv->field_mode;
1460   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1461
1462   if (bitsize == 0 || unit < bitsize)
1463     return NULL_RTX;
1464
1465   if (MEM_P (op0))
1466     /* Get a reference to the first byte of the field.  */
1467     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1468                                 &bitnum);
1469   else
1470     {
1471       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1472       if (BYTES_BIG_ENDIAN)
1473         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1474
1475       /* If op0 is a register, we need it in EXT_MODE to make it
1476          acceptable to the format of ext(z)v.  */
1477       if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1478         return NULL_RTX;
1479       if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1480         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1481     }
1482
1483   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1484      "backwards" from the size of the unit we are extracting from.
1485      Otherwise, we count bits from the most significant on a
1486      BYTES/BITS_BIG_ENDIAN machine.  */
1487
1488   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1489     bitnum = unit - bitsize - bitnum;
1490
1491   if (target == 0)
1492     target = spec_target = gen_reg_rtx (tmode);
1493
1494   if (GET_MODE (target) != ext_mode)
1495     {
1496       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1497          between the mode of the extraction (word_mode) and the target
1498          mode.  Instead, create a temporary and use convert_move to set
1499          the target.  */
1500       if (REG_P (target)
1501           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1502         {
1503           target = gen_lowpart (ext_mode, target);
1504           if (GET_MODE_PRECISION (ext_mode)
1505               > GET_MODE_PRECISION (GET_MODE (spec_target)))
1506             spec_target_subreg = target;
1507         }
1508       else
1509         target = gen_reg_rtx (ext_mode);
1510     }
1511
1512   create_output_operand (&ops[0], target, ext_mode);
1513   create_fixed_operand (&ops[1], op0);
1514   create_integer_operand (&ops[2], bitsize);
1515   create_integer_operand (&ops[3], bitnum);
1516   if (maybe_expand_insn (extv->icode, 4, ops))
1517     {
1518       target = ops[0].value;
1519       if (target == spec_target)
1520         return target;
1521       if (target == spec_target_subreg)
1522         return spec_target;
1523       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1524     }
1525   return NULL_RTX;
1526 }
1527
1528 /* A subroutine of extract_bit_field, with the same arguments.
1529    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1530    if we can find no other means of implementing the operation.
1531    if FALLBACK_P is false, return NULL instead.  */
1532
1533 static rtx
1534 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1535                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1536                      machine_mode mode, machine_mode tmode,
1537                      bool reverse, bool fallback_p, rtx *alt_rtl)
1538 {
1539   rtx op0 = str_rtx;
1540   machine_mode int_mode;
1541   machine_mode mode1;
1542
1543   if (tmode == VOIDmode)
1544     tmode = mode;
1545
1546   while (GET_CODE (op0) == SUBREG)
1547     {
1548       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1549       op0 = SUBREG_REG (op0);
1550     }
1551
1552   /* If we have an out-of-bounds access to a register, just return an
1553      uninitialized register of the required mode.  This can occur if the
1554      source code contains an out-of-bounds access to a small array.  */
1555   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1556     return gen_reg_rtx (tmode);
1557
1558   if (REG_P (op0)
1559       && mode == GET_MODE (op0)
1560       && bitnum == 0
1561       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1562     {
1563       if (reverse)
1564         op0 = flip_storage_order (mode, op0);
1565       /* We're trying to extract a full register from itself.  */
1566       return op0;
1567     }
1568
1569   /* See if we can get a better vector mode before extracting.  */
1570   if (VECTOR_MODE_P (GET_MODE (op0))
1571       && !MEM_P (op0)
1572       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1573     {
1574       machine_mode new_mode;
1575
1576       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1577         new_mode = MIN_MODE_VECTOR_FLOAT;
1578       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1579         new_mode = MIN_MODE_VECTOR_FRACT;
1580       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1581         new_mode = MIN_MODE_VECTOR_UFRACT;
1582       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1583         new_mode = MIN_MODE_VECTOR_ACCUM;
1584       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1585         new_mode = MIN_MODE_VECTOR_UACCUM;
1586       else
1587         new_mode = MIN_MODE_VECTOR_INT;
1588
1589       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1590         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1591             && GET_MODE_UNIT_SIZE (new_mode) == GET_MODE_SIZE (tmode)
1592             && targetm.vector_mode_supported_p (new_mode))
1593           break;
1594       if (new_mode != VOIDmode)
1595         op0 = gen_lowpart (new_mode, op0);
1596     }
1597
1598   /* Use vec_extract patterns for extracting parts of vectors whenever
1599      available.  */
1600   if (VECTOR_MODE_P (GET_MODE (op0))
1601       && !MEM_P (op0)
1602       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1603       && ((bitnum + bitsize - 1) / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
1604           == bitnum / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
1605     {
1606       struct expand_operand ops[3];
1607       machine_mode outermode = GET_MODE (op0);
1608       machine_mode innermode = GET_MODE_INNER (outermode);
1609       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1610       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1611
1612       create_output_operand (&ops[0], target, innermode);
1613       ops[0].target = 1;
1614       create_input_operand (&ops[1], op0, outermode);
1615       create_integer_operand (&ops[2], pos);
1616       if (maybe_expand_insn (icode, 3, ops))
1617         {
1618           if (alt_rtl && ops[0].target)
1619             *alt_rtl = target;
1620           target = ops[0].value;
1621           if (GET_MODE (target) != mode)
1622             return gen_lowpart (tmode, target);
1623           return target;
1624         }
1625     }
1626
1627   /* Make sure we are playing with integral modes.  Pun with subregs
1628      if we aren't.  */
1629   {
1630     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1631     if (imode != GET_MODE (op0))
1632       {
1633         if (MEM_P (op0))
1634           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1635         else if (imode != BLKmode)
1636           {
1637             op0 = gen_lowpart (imode, op0);
1638
1639             /* If we got a SUBREG, force it into a register since we
1640                aren't going to be able to do another SUBREG on it.  */
1641             if (GET_CODE (op0) == SUBREG)
1642               op0 = force_reg (imode, op0);
1643           }
1644         else
1645           {
1646             HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1647             rtx mem = assign_stack_temp (GET_MODE (op0), size);
1648             emit_move_insn (mem, op0);
1649             op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1650           }
1651       }
1652   }
1653
1654   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1655      If that's wrong, the solution is to test for it and set TARGET to 0
1656      if needed.  */
1657
1658   /* Get the mode of the field to use for atomic access or subreg
1659      conversion.  */
1660   mode1 = mode;
1661   if (SCALAR_INT_MODE_P (tmode))
1662     {
1663       machine_mode try_mode = mode_for_size (bitsize,
1664                                                   GET_MODE_CLASS (tmode), 0);
1665       if (try_mode != BLKmode)
1666         mode1 = try_mode;
1667     }
1668   gcc_assert (mode1 != BLKmode);
1669
1670   /* Extraction of a full MODE1 value can be done with a subreg as long
1671      as the least significant bit of the value is the least significant
1672      bit of either OP0 or a word of OP0.  */
1673   if (!MEM_P (op0)
1674       && !reverse
1675       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1676       && bitsize == GET_MODE_BITSIZE (mode1)
1677       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1678     {
1679       rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1680                                      bitnum / BITS_PER_UNIT);
1681       if (sub)
1682         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1683     }
1684
1685   /* Extraction of a full MODE1 value can be done with a load as long as
1686      the field is on a byte boundary and is sufficiently aligned.  */
1687   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1688     {
1689       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1690       if (reverse)
1691         op0 = flip_storage_order (mode1, op0);
1692       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1693     }
1694
1695   /* Handle fields bigger than a word.  */
1696
1697   if (bitsize > BITS_PER_WORD)
1698     {
1699       /* Here we transfer the words of the field
1700          in the order least significant first.
1701          This is because the most significant word is the one which may
1702          be less than full.  */
1703
1704       const bool backwards = WORDS_BIG_ENDIAN;
1705       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1706       unsigned int i;
1707       rtx_insn *last;
1708
1709       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1710         target = gen_reg_rtx (mode);
1711
1712       /* In case we're about to clobber a base register or something
1713          (see gcc.c-torture/execute/20040625-1.c).   */
1714       if (reg_mentioned_p (target, str_rtx))
1715         target = gen_reg_rtx (mode);
1716
1717       /* Indicate for flow that the entire target reg is being set.  */
1718       emit_clobber (target);
1719
1720       last = get_last_insn ();
1721       for (i = 0; i < nwords; i++)
1722         {
1723           /* If I is 0, use the low-order word in both field and target;
1724              if I is 1, use the next to lowest word; and so on.  */
1725           /* Word number in TARGET to use.  */
1726           unsigned int wordnum
1727             = (backwards
1728                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1729                : i);
1730           /* Offset from start of field in OP0.  */
1731           unsigned int bit_offset = (backwards ^ reverse
1732                                      ? MAX ((int) bitsize - ((int) i + 1)
1733                                             * BITS_PER_WORD,
1734                                             0)
1735                                      : (int) i * BITS_PER_WORD);
1736           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1737           rtx result_part
1738             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1739                                              bitsize - i * BITS_PER_WORD),
1740                                    bitnum + bit_offset, 1, target_part,
1741                                    mode, word_mode, reverse, fallback_p, NULL);
1742
1743           gcc_assert (target_part);
1744           if (!result_part)
1745             {
1746               delete_insns_since (last);
1747               return NULL;
1748             }
1749
1750           if (result_part != target_part)
1751             emit_move_insn (target_part, result_part);
1752         }
1753
1754       if (unsignedp)
1755         {
1756           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1757              need to be zero'd out.  */
1758           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1759             {
1760               unsigned int i, total_words;
1761
1762               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1763               for (i = nwords; i < total_words; i++)
1764                 emit_move_insn
1765                   (operand_subword (target,
1766                                     backwards ? total_words - i - 1 : i,
1767                                     1, VOIDmode),
1768                    const0_rtx);
1769             }
1770           return target;
1771         }
1772
1773       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1774       target = expand_shift (LSHIFT_EXPR, mode, target,
1775                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1776       return expand_shift (RSHIFT_EXPR, mode, target,
1777                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1778     }
1779
1780   /* If OP0 is a multi-word register, narrow it to the affected word.
1781      If the region spans two words, defer to extract_split_bit_field.  */
1782   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1783     {
1784       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1785         {
1786           if (!fallback_p)
1787             return NULL_RTX;
1788           target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1789                                             reverse);
1790           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1791         }
1792       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1793                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1794       bitnum %= BITS_PER_WORD;
1795     }
1796
1797   /* From here on we know the desired field is smaller than a word.
1798      If OP0 is a register, it too fits within a word.  */
1799   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1800   extraction_insn extv;
1801   if (!MEM_P (op0)
1802       && !reverse
1803       /* ??? We could limit the structure size to the part of OP0 that
1804          contains the field, with appropriate checks for endianness
1805          and TRULY_NOOP_TRUNCATION.  */
1806       && get_best_reg_extraction_insn (&extv, pattern,
1807                                        GET_MODE_BITSIZE (GET_MODE (op0)),
1808                                        tmode))
1809     {
1810       rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1811                                                  unsignedp, target, mode,
1812                                                  tmode);
1813       if (result)
1814         return result;
1815     }
1816
1817   /* If OP0 is a memory, try copying it to a register and seeing if a
1818      cheap register alternative is available.  */
1819   if (MEM_P (op0) & !reverse)
1820     {
1821       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1822                                         tmode))
1823         {
1824           rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1825                                                      bitnum, unsignedp,
1826                                                      target, mode,
1827                                                      tmode);
1828           if (result)
1829             return result;
1830         }
1831
1832       rtx_insn *last = get_last_insn ();
1833
1834       /* Try loading part of OP0 into a register and extracting the
1835          bitfield from that.  */
1836       unsigned HOST_WIDE_INT bitpos;
1837       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1838                                                0, 0, tmode, &bitpos);
1839       if (xop0)
1840         {
1841           xop0 = copy_to_reg (xop0);
1842           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1843                                             unsignedp, target,
1844                                             mode, tmode, reverse, false, NULL);
1845           if (result)
1846             return result;
1847           delete_insns_since (last);
1848         }
1849     }
1850
1851   if (!fallback_p)
1852     return NULL;
1853
1854   /* Find a correspondingly-sized integer field, so we can apply
1855      shifts and masks to it.  */
1856   int_mode = int_mode_for_mode (tmode);
1857   if (int_mode == BLKmode)
1858     int_mode = int_mode_for_mode (mode);
1859   /* Should probably push op0 out to memory and then do a load.  */
1860   gcc_assert (int_mode != BLKmode);
1861
1862   target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum, target,
1863                                     unsignedp, reverse);
1864
1865   /* Complex values must be reversed piecewise, so we need to undo the global
1866      reversal, convert to the complex mode and reverse again.  */
1867   if (reverse && COMPLEX_MODE_P (tmode))
1868     {
1869       target = flip_storage_order (int_mode, target);
1870       target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1871       target = flip_storage_order (tmode, target);
1872     }
1873   else
1874     target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1875
1876   return target;
1877 }
1878
1879 /* Generate code to extract a byte-field from STR_RTX
1880    containing BITSIZE bits, starting at BITNUM,
1881    and put it in TARGET if possible (if TARGET is nonzero).
1882    Regardless of TARGET, we return the rtx for where the value is placed.
1883
1884    STR_RTX is the structure containing the byte (a REG or MEM).
1885    UNSIGNEDP is nonzero if this is an unsigned bit field.
1886    MODE is the natural mode of the field value once extracted.
1887    TMODE is the mode the caller would like the value to have;
1888    but the value may be returned with type MODE instead.
1889
1890    If REVERSE is true, the extraction is to be done in reverse order.
1891
1892    If a TARGET is specified and we can store in it at no extra cost,
1893    we do so, and return TARGET.
1894    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1895    if they are equally easy.  */
1896
1897 rtx
1898 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1899                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1900                    machine_mode mode, machine_mode tmode, bool reverse,
1901                    rtx *alt_rtl)
1902 {
1903   machine_mode mode1;
1904
1905   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1906   if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1907     mode1 = GET_MODE (str_rtx);
1908   else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1909     mode1 = GET_MODE (target);
1910   else
1911     mode1 = tmode;
1912
1913   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1914     {
1915       /* Extraction of a full MODE1 value can be done with a simple load.
1916          We know here that the field can be accessed with one single
1917          instruction.  For targets that support unaligned memory,
1918          an unaligned access may be necessary.  */
1919       if (bitsize == GET_MODE_BITSIZE (mode1))
1920         {
1921           rtx result = adjust_bitfield_address (str_rtx, mode1,
1922                                                 bitnum / BITS_PER_UNIT);
1923           if (reverse)
1924             result = flip_storage_order (mode1, result);
1925           gcc_assert (bitnum % BITS_PER_UNIT == 0);
1926           return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1927         }
1928
1929       str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1930                                       &bitnum);
1931       gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (mode1));
1932       str_rtx = copy_to_reg (str_rtx);
1933     }
1934
1935   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1936                               target, mode, tmode, reverse, true, alt_rtl);
1937 }
1938 \f
1939 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1940    from bit BITNUM of OP0.
1941
1942    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1943    If REVERSE is true, the extraction is to be done in reverse order.
1944
1945    If TARGET is nonzero, attempts to store the value there
1946    and return TARGET, but this is not guaranteed.
1947    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1948
1949 static rtx
1950 extract_fixed_bit_field (machine_mode tmode, rtx op0,
1951                          unsigned HOST_WIDE_INT bitsize,
1952                          unsigned HOST_WIDE_INT bitnum, rtx target,
1953                          int unsignedp, bool reverse)
1954 {
1955   if (MEM_P (op0))
1956     {
1957       machine_mode mode
1958         = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1959                          MEM_VOLATILE_P (op0));
1960
1961       if (mode == VOIDmode)
1962         /* The only way this should occur is if the field spans word
1963            boundaries.  */
1964         return extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1965                                         reverse);
1966
1967       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1968     }
1969
1970   return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1971                                     target, unsignedp, reverse);
1972 }
1973
1974 /* Helper function for extract_fixed_bit_field, extracts
1975    the bit field always using the MODE of OP0.  */
1976
1977 static rtx
1978 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
1979                            unsigned HOST_WIDE_INT bitsize,
1980                            unsigned HOST_WIDE_INT bitnum, rtx target,
1981                            int unsignedp, bool reverse)
1982 {
1983   machine_mode mode = GET_MODE (op0);
1984   gcc_assert (SCALAR_INT_MODE_P (mode));
1985
1986   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1987      for invalid input, such as extract equivalent of f5 from
1988      gcc.dg/pr48335-2.c.  */
1989
1990   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1991     /* BITNUM is the distance between our msb and that of OP0.
1992        Convert it to the distance from the lsb.  */
1993     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1994
1995   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1996      We have reduced the big-endian case to the little-endian case.  */
1997   if (reverse)
1998     op0 = flip_storage_order (mode, op0);
1999
2000   if (unsignedp)
2001     {
2002       if (bitnum)
2003         {
2004           /* If the field does not already start at the lsb,
2005              shift it so it does.  */
2006           /* Maybe propagate the target for the shift.  */
2007           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2008           if (tmode != mode)
2009             subtarget = 0;
2010           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2011         }
2012       /* Convert the value to the desired mode.  */
2013       if (mode != tmode)
2014         op0 = convert_to_mode (tmode, op0, 1);
2015
2016       /* Unless the msb of the field used to be the msb when we shifted,
2017          mask out the upper bits.  */
2018
2019       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2020         return expand_binop (GET_MODE (op0), and_optab, op0,
2021                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
2022                              target, 1, OPTAB_LIB_WIDEN);
2023       return op0;
2024     }
2025
2026   /* To extract a signed bit-field, first shift its msb to the msb of the word,
2027      then arithmetic-shift its lsb to the lsb of the word.  */
2028   op0 = force_reg (mode, op0);
2029
2030   /* Find the narrowest integer mode that contains the field.  */
2031
2032   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
2033        mode = GET_MODE_WIDER_MODE (mode))
2034     if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
2035       {
2036         op0 = convert_to_mode (mode, op0, 0);
2037         break;
2038       }
2039
2040   if (mode != tmode)
2041     target = 0;
2042
2043   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2044     {
2045       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2046       /* Maybe propagate the target for the shift.  */
2047       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2048       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2049     }
2050
2051   return expand_shift (RSHIFT_EXPR, mode, op0,
2052                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2053 }
2054
2055 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2056    VALUE << BITPOS.  */
2057
2058 static rtx
2059 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2060               int bitpos)
2061 {
2062   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2063 }
2064 \f
2065 /* Extract a bit field that is split across two words
2066    and return an RTX for the result.
2067
2068    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2069    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2070    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2071
2072    If REVERSE is true, the extraction is to be done in reverse order.  */
2073
2074 static rtx
2075 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2076                          unsigned HOST_WIDE_INT bitpos, int unsignedp,
2077                          bool reverse)
2078 {
2079   unsigned int unit;
2080   unsigned int bitsdone = 0;
2081   rtx result = NULL_RTX;
2082   int first = 1;
2083
2084   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2085      much at a time.  */
2086   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2087     unit = BITS_PER_WORD;
2088   else
2089     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2090
2091   while (bitsdone < bitsize)
2092     {
2093       unsigned HOST_WIDE_INT thissize;
2094       rtx part, word;
2095       unsigned HOST_WIDE_INT thispos;
2096       unsigned HOST_WIDE_INT offset;
2097
2098       offset = (bitpos + bitsdone) / unit;
2099       thispos = (bitpos + bitsdone) % unit;
2100
2101       /* THISSIZE must not overrun a word boundary.  Otherwise,
2102          extract_fixed_bit_field will call us again, and we will mutually
2103          recurse forever.  */
2104       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2105       thissize = MIN (thissize, unit - thispos);
2106
2107       /* If OP0 is a register, then handle OFFSET here.  */
2108       if (SUBREG_P (op0) || REG_P (op0))
2109         {
2110           word = operand_subword_force (op0, offset, GET_MODE (op0));
2111           offset = 0;
2112         }
2113       else
2114         word = op0;
2115
2116       /* Extract the parts in bit-counting order,
2117          whose meaning is determined by BYTES_PER_UNIT.
2118          OFFSET is in UNITs, and UNIT is in bits.  */
2119       part = extract_fixed_bit_field (word_mode, word, thissize,
2120                                       offset * unit + thispos, 0, 1, reverse);
2121       bitsdone += thissize;
2122
2123       /* Shift this part into place for the result.  */
2124       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2125         {
2126           if (bitsize != bitsdone)
2127             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2128                                  bitsize - bitsdone, 0, 1);
2129         }
2130       else
2131         {
2132           if (bitsdone != thissize)
2133             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2134                                  bitsdone - thissize, 0, 1);
2135         }
2136
2137       if (first)
2138         result = part;
2139       else
2140         /* Combine the parts with bitwise or.  This works
2141            because we extracted each part as an unsigned bit field.  */
2142         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2143                                OPTAB_LIB_WIDEN);
2144
2145       first = 0;
2146     }
2147
2148   /* Unsigned bit field: we are done.  */
2149   if (unsignedp)
2150     return result;
2151   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2152   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2153                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2154   return expand_shift (RSHIFT_EXPR, word_mode, result,
2155                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2156 }
2157 \f
2158 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2159    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2160    MODE, fill the upper bits with zeros.  Fail if the layout of either
2161    mode is unknown (as for CC modes) or if the extraction would involve
2162    unprofitable mode punning.  Return the value on success, otherwise
2163    return null.
2164
2165    This is different from gen_lowpart* in these respects:
2166
2167      - the returned value must always be considered an rvalue
2168
2169      - when MODE is wider than SRC_MODE, the extraction involves
2170        a zero extension
2171
2172      - when MODE is smaller than SRC_MODE, the extraction involves
2173        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2174
2175    In other words, this routine performs a computation, whereas the
2176    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2177    operations.  */
2178
2179 rtx
2180 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2181 {
2182   machine_mode int_mode, src_int_mode;
2183
2184   if (mode == src_mode)
2185     return src;
2186
2187   if (CONSTANT_P (src))
2188     {
2189       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2190          fails, it will happily create (subreg (symbol_ref)) or similar
2191          invalid SUBREGs.  */
2192       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2193       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2194       if (ret)
2195         return ret;
2196
2197       if (GET_MODE (src) == VOIDmode
2198           || !validate_subreg (mode, src_mode, src, byte))
2199         return NULL_RTX;
2200
2201       src = force_reg (GET_MODE (src), src);
2202       return gen_rtx_SUBREG (mode, src, byte);
2203     }
2204
2205   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2206     return NULL_RTX;
2207
2208   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2209       && MODES_TIEABLE_P (mode, src_mode))
2210     {
2211       rtx x = gen_lowpart_common (mode, src);
2212       if (x)
2213         return x;
2214     }
2215
2216   src_int_mode = int_mode_for_mode (src_mode);
2217   int_mode = int_mode_for_mode (mode);
2218   if (src_int_mode == BLKmode || int_mode == BLKmode)
2219     return NULL_RTX;
2220
2221   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2222     return NULL_RTX;
2223   if (!MODES_TIEABLE_P (int_mode, mode))
2224     return NULL_RTX;
2225
2226   src = gen_lowpart (src_int_mode, src);
2227   src = convert_modes (int_mode, src_int_mode, src, true);
2228   src = gen_lowpart (mode, src);
2229   return src;
2230 }
2231 \f
2232 /* Add INC into TARGET.  */
2233
2234 void
2235 expand_inc (rtx target, rtx inc)
2236 {
2237   rtx value = expand_binop (GET_MODE (target), add_optab,
2238                             target, inc,
2239                             target, 0, OPTAB_LIB_WIDEN);
2240   if (value != target)
2241     emit_move_insn (target, value);
2242 }
2243
2244 /* Subtract DEC from TARGET.  */
2245
2246 void
2247 expand_dec (rtx target, rtx dec)
2248 {
2249   rtx value = expand_binop (GET_MODE (target), sub_optab,
2250                             target, dec,
2251                             target, 0, OPTAB_LIB_WIDEN);
2252   if (value != target)
2253     emit_move_insn (target, value);
2254 }
2255 \f
2256 /* Output a shift instruction for expression code CODE,
2257    with SHIFTED being the rtx for the value to shift,
2258    and AMOUNT the rtx for the amount to shift by.
2259    Store the result in the rtx TARGET, if that is convenient.
2260    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2261    Return the rtx for where the value is.
2262    If that cannot be done, abort the compilation unless MAY_FAIL is true,
2263    in which case 0 is returned.  */
2264
2265 static rtx
2266 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2267                 rtx amount, rtx target, int unsignedp, bool may_fail = false)
2268 {
2269   rtx op1, temp = 0;
2270   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2271   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2272   optab lshift_optab = ashl_optab;
2273   optab rshift_arith_optab = ashr_optab;
2274   optab rshift_uns_optab = lshr_optab;
2275   optab lrotate_optab = rotl_optab;
2276   optab rrotate_optab = rotr_optab;
2277   machine_mode op1_mode;
2278   machine_mode scalar_mode = mode;
2279   int attempt;
2280   bool speed = optimize_insn_for_speed_p ();
2281
2282   if (VECTOR_MODE_P (mode))
2283     scalar_mode = GET_MODE_INNER (mode);
2284   op1 = amount;
2285   op1_mode = GET_MODE (op1);
2286
2287   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2288      shift amount is a vector, use the vector/vector shift patterns.  */
2289   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2290     {
2291       lshift_optab = vashl_optab;
2292       rshift_arith_optab = vashr_optab;
2293       rshift_uns_optab = vlshr_optab;
2294       lrotate_optab = vrotl_optab;
2295       rrotate_optab = vrotr_optab;
2296     }
2297
2298   /* Previously detected shift-counts computed by NEGATE_EXPR
2299      and shifted in the other direction; but that does not work
2300      on all machines.  */
2301
2302   if (SHIFT_COUNT_TRUNCATED)
2303     {
2304       if (CONST_INT_P (op1)
2305           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2306               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2307         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2308                        % GET_MODE_BITSIZE (scalar_mode));
2309       else if (GET_CODE (op1) == SUBREG
2310                && subreg_lowpart_p (op1)
2311                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2312                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2313         op1 = SUBREG_REG (op1);
2314     }
2315
2316   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2317      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2318      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2319      amount instead.  */
2320   if (rotate
2321       && CONST_INT_P (op1)
2322       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2323                    GET_MODE_BITSIZE (scalar_mode) - 1))
2324     {
2325       op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2326       left = !left;
2327       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2328     }
2329
2330   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2331      Note that this is not the case for bigger values.  For instance a rotation
2332      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2333      0x04030201 (bswapsi).  */
2334   if (rotate
2335       && CONST_INT_P (op1)
2336       && INTVAL (op1) == BITS_PER_UNIT
2337       && GET_MODE_SIZE (scalar_mode) == 2
2338       && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2339     return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2340                                   unsignedp);
2341
2342   if (op1 == const0_rtx)
2343     return shifted;
2344
2345   /* Check whether its cheaper to implement a left shift by a constant
2346      bit count by a sequence of additions.  */
2347   if (code == LSHIFT_EXPR
2348       && CONST_INT_P (op1)
2349       && INTVAL (op1) > 0
2350       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2351       && INTVAL (op1) < MAX_BITS_PER_WORD
2352       && (shift_cost (speed, mode, INTVAL (op1))
2353           > INTVAL (op1) * add_cost (speed, mode))
2354       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2355     {
2356       int i;
2357       for (i = 0; i < INTVAL (op1); i++)
2358         {
2359           temp = force_reg (mode, shifted);
2360           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2361                                   unsignedp, OPTAB_LIB_WIDEN);
2362         }
2363       return shifted;
2364     }
2365
2366   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2367     {
2368       enum optab_methods methods;
2369
2370       if (attempt == 0)
2371         methods = OPTAB_DIRECT;
2372       else if (attempt == 1)
2373         methods = OPTAB_WIDEN;
2374       else
2375         methods = OPTAB_LIB_WIDEN;
2376
2377       if (rotate)
2378         {
2379           /* Widening does not work for rotation.  */
2380           if (methods == OPTAB_WIDEN)
2381             continue;
2382           else if (methods == OPTAB_LIB_WIDEN)
2383             {
2384               /* If we have been unable to open-code this by a rotation,
2385                  do it as the IOR of two shifts.  I.e., to rotate A
2386                  by N bits, compute
2387                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2388                  where C is the bitsize of A.
2389
2390                  It is theoretically possible that the target machine might
2391                  not be able to perform either shift and hence we would
2392                  be making two libcalls rather than just the one for the
2393                  shift (similarly if IOR could not be done).  We will allow
2394                  this extremely unlikely lossage to avoid complicating the
2395                  code below.  */
2396
2397               rtx subtarget = target == shifted ? 0 : target;
2398               rtx new_amount, other_amount;
2399               rtx temp1;
2400
2401               new_amount = op1;
2402               if (op1 == const0_rtx)
2403                 return shifted;
2404               else if (CONST_INT_P (op1))
2405                 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2406                                         - INTVAL (op1));
2407               else
2408                 {
2409                   other_amount
2410                     = simplify_gen_unary (NEG, GET_MODE (op1),
2411                                           op1, GET_MODE (op1));
2412                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2413                   other_amount
2414                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2415                                            gen_int_mode (mask, GET_MODE (op1)));
2416                 }
2417
2418               shifted = force_reg (mode, shifted);
2419
2420               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2421                                      mode, shifted, new_amount, 0, 1);
2422               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2423                                       mode, shifted, other_amount,
2424                                       subtarget, 1);
2425               return expand_binop (mode, ior_optab, temp, temp1, target,
2426                                    unsignedp, methods);
2427             }
2428
2429           temp = expand_binop (mode,
2430                                left ? lrotate_optab : rrotate_optab,
2431                                shifted, op1, target, unsignedp, methods);
2432         }
2433       else if (unsignedp)
2434         temp = expand_binop (mode,
2435                              left ? lshift_optab : rshift_uns_optab,
2436                              shifted, op1, target, unsignedp, methods);
2437
2438       /* Do arithmetic shifts.
2439          Also, if we are going to widen the operand, we can just as well
2440          use an arithmetic right-shift instead of a logical one.  */
2441       if (temp == 0 && ! rotate
2442           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2443         {
2444           enum optab_methods methods1 = methods;
2445
2446           /* If trying to widen a log shift to an arithmetic shift,
2447              don't accept an arithmetic shift of the same size.  */
2448           if (unsignedp)
2449             methods1 = OPTAB_MUST_WIDEN;
2450
2451           /* Arithmetic shift */
2452
2453           temp = expand_binop (mode,
2454                                left ? lshift_optab : rshift_arith_optab,
2455                                shifted, op1, target, unsignedp, methods1);
2456         }
2457
2458       /* We used to try extzv here for logical right shifts, but that was
2459          only useful for one machine, the VAX, and caused poor code
2460          generation there for lshrdi3, so the code was deleted and a
2461          define_expand for lshrsi3 was added to vax.md.  */
2462     }
2463
2464   gcc_assert (temp != NULL_RTX || may_fail);
2465   return temp;
2466 }
2467
2468 /* Output a shift instruction for expression code CODE,
2469    with SHIFTED being the rtx for the value to shift,
2470    and AMOUNT the amount to shift by.
2471    Store the result in the rtx TARGET, if that is convenient.
2472    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2473    Return the rtx for where the value is.  */
2474
2475 rtx
2476 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2477               int amount, rtx target, int unsignedp)
2478 {
2479   return expand_shift_1 (code, mode,
2480                          shifted, GEN_INT (amount), target, unsignedp);
2481 }
2482
2483 /* Likewise, but return 0 if that cannot be done.  */
2484
2485 static rtx
2486 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2487                     int amount, rtx target, int unsignedp)
2488 {
2489   return expand_shift_1 (code, mode,
2490                          shifted, GEN_INT (amount), target, unsignedp, true);
2491 }
2492
2493 /* Output a shift instruction for expression code CODE,
2494    with SHIFTED being the rtx for the value to shift,
2495    and AMOUNT the tree for the amount to shift by.
2496    Store the result in the rtx TARGET, if that is convenient.
2497    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2498    Return the rtx for where the value is.  */
2499
2500 rtx
2501 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2502                        tree amount, rtx target, int unsignedp)
2503 {
2504   return expand_shift_1 (code, mode,
2505                          shifted, expand_normal (amount), target, unsignedp);
2506 }
2507
2508 \f
2509 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2510                         const struct mult_cost *, machine_mode mode);
2511 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2512                               const struct algorithm *, enum mult_variant);
2513 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2514 static rtx extract_high_half (machine_mode, rtx);
2515 static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2516 static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
2517                                        int, int);
2518 /* Compute and return the best algorithm for multiplying by T.
2519    The algorithm must cost less than cost_limit
2520    If retval.cost >= COST_LIMIT, no algorithm was found and all
2521    other field of the returned struct are undefined.
2522    MODE is the machine mode of the multiplication.  */
2523
2524 static void
2525 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2526             const struct mult_cost *cost_limit, machine_mode mode)
2527 {
2528   int m;
2529   struct algorithm *alg_in, *best_alg;
2530   struct mult_cost best_cost;
2531   struct mult_cost new_limit;
2532   int op_cost, op_latency;
2533   unsigned HOST_WIDE_INT orig_t = t;
2534   unsigned HOST_WIDE_INT q;
2535   int maxm, hash_index;
2536   bool cache_hit = false;
2537   enum alg_code cache_alg = alg_zero;
2538   bool speed = optimize_insn_for_speed_p ();
2539   machine_mode imode;
2540   struct alg_hash_entry *entry_ptr;
2541
2542   /* Indicate that no algorithm is yet found.  If no algorithm
2543      is found, this value will be returned and indicate failure.  */
2544   alg_out->cost.cost = cost_limit->cost + 1;
2545   alg_out->cost.latency = cost_limit->latency + 1;
2546
2547   if (cost_limit->cost < 0
2548       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2549     return;
2550
2551   /* Be prepared for vector modes.  */
2552   imode = GET_MODE_INNER (mode);
2553
2554   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2555
2556   /* Restrict the bits of "t" to the multiplication's mode.  */
2557   t &= GET_MODE_MASK (imode);
2558
2559   /* t == 1 can be done in zero cost.  */
2560   if (t == 1)
2561     {
2562       alg_out->ops = 1;
2563       alg_out->cost.cost = 0;
2564       alg_out->cost.latency = 0;
2565       alg_out->op[0] = alg_m;
2566       return;
2567     }
2568
2569   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2570      fail now.  */
2571   if (t == 0)
2572     {
2573       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2574         return;
2575       else
2576         {
2577           alg_out->ops = 1;
2578           alg_out->cost.cost = zero_cost (speed);
2579           alg_out->cost.latency = zero_cost (speed);
2580           alg_out->op[0] = alg_zero;
2581           return;
2582         }
2583     }
2584
2585   /* We'll be needing a couple extra algorithm structures now.  */
2586
2587   alg_in = XALLOCA (struct algorithm);
2588   best_alg = XALLOCA (struct algorithm);
2589   best_cost = *cost_limit;
2590
2591   /* Compute the hash index.  */
2592   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2593
2594   /* See if we already know what to do for T.  */
2595   entry_ptr = alg_hash_entry_ptr (hash_index);
2596   if (entry_ptr->t == t
2597       && entry_ptr->mode == mode
2598       && entry_ptr->speed == speed
2599       && entry_ptr->alg != alg_unknown)
2600     {
2601       cache_alg = entry_ptr->alg;
2602
2603       if (cache_alg == alg_impossible)
2604         {
2605           /* The cache tells us that it's impossible to synthesize
2606              multiplication by T within entry_ptr->cost.  */
2607           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2608             /* COST_LIMIT is at least as restrictive as the one
2609                recorded in the hash table, in which case we have no
2610                hope of synthesizing a multiplication.  Just
2611                return.  */
2612             return;
2613
2614           /* If we get here, COST_LIMIT is less restrictive than the
2615              one recorded in the hash table, so we may be able to
2616              synthesize a multiplication.  Proceed as if we didn't
2617              have the cache entry.  */
2618         }
2619       else
2620         {
2621           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2622             /* The cached algorithm shows that this multiplication
2623                requires more cost than COST_LIMIT.  Just return.  This
2624                way, we don't clobber this cache entry with
2625                alg_impossible but retain useful information.  */
2626             return;
2627
2628           cache_hit = true;
2629
2630           switch (cache_alg)
2631             {
2632             case alg_shift:
2633               goto do_alg_shift;
2634
2635             case alg_add_t_m2:
2636             case alg_sub_t_m2:
2637               goto do_alg_addsub_t_m2;
2638
2639             case alg_add_factor:
2640             case alg_sub_factor:
2641               goto do_alg_addsub_factor;
2642
2643             case alg_add_t2_m:
2644               goto do_alg_add_t2_m;
2645
2646             case alg_sub_t2_m:
2647               goto do_alg_sub_t2_m;
2648
2649             default:
2650               gcc_unreachable ();
2651             }
2652         }
2653     }
2654
2655   /* If we have a group of zero bits at the low-order part of T, try
2656      multiplying by the remaining bits and then doing a shift.  */
2657
2658   if ((t & 1) == 0)
2659     {
2660     do_alg_shift:
2661       m = ctz_or_zero (t); /* m = number of low zero bits */
2662       if (m < maxm)
2663         {
2664           q = t >> m;
2665           /* The function expand_shift will choose between a shift and
2666              a sequence of additions, so the observed cost is given as
2667              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2668           op_cost = m * add_cost (speed, mode);
2669           if (shift_cost (speed, mode, m) < op_cost)
2670             op_cost = shift_cost (speed, mode, m);
2671           new_limit.cost = best_cost.cost - op_cost;
2672           new_limit.latency = best_cost.latency - op_cost;
2673           synth_mult (alg_in, q, &new_limit, mode);
2674
2675           alg_in->cost.cost += op_cost;
2676           alg_in->cost.latency += op_cost;
2677           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2678             {
2679               best_cost = alg_in->cost;
2680               std::swap (alg_in, best_alg);
2681               best_alg->log[best_alg->ops] = m;
2682               best_alg->op[best_alg->ops] = alg_shift;
2683             }
2684
2685           /* See if treating ORIG_T as a signed number yields a better
2686              sequence.  Try this sequence only for a negative ORIG_T
2687              as it would be useless for a non-negative ORIG_T.  */
2688           if ((HOST_WIDE_INT) orig_t < 0)
2689             {
2690               /* Shift ORIG_T as follows because a right shift of a
2691                  negative-valued signed type is implementation
2692                  defined.  */
2693               q = ~(~orig_t >> m);
2694               /* The function expand_shift will choose between a shift
2695                  and a sequence of additions, so the observed cost is
2696                  given as MIN (m * add_cost(speed, mode),
2697                  shift_cost(speed, mode, m)).  */
2698               op_cost = m * add_cost (speed, mode);
2699               if (shift_cost (speed, mode, m) < op_cost)
2700                 op_cost = shift_cost (speed, mode, m);
2701               new_limit.cost = best_cost.cost - op_cost;
2702               new_limit.latency = best_cost.latency - op_cost;
2703               synth_mult (alg_in, q, &new_limit, mode);
2704
2705               alg_in->cost.cost += op_cost;
2706               alg_in->cost.latency += op_cost;
2707               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2708                 {
2709                   best_cost = alg_in->cost;
2710                   std::swap (alg_in, best_alg);
2711                   best_alg->log[best_alg->ops] = m;
2712                   best_alg->op[best_alg->ops] = alg_shift;
2713                 }
2714             }
2715         }
2716       if (cache_hit)
2717         goto done;
2718     }
2719
2720   /* If we have an odd number, add or subtract one.  */
2721   if ((t & 1) != 0)
2722     {
2723       unsigned HOST_WIDE_INT w;
2724
2725     do_alg_addsub_t_m2:
2726       for (w = 1; (w & t) != 0; w <<= 1)
2727         ;
2728       /* If T was -1, then W will be zero after the loop.  This is another
2729          case where T ends with ...111.  Handling this with (T + 1) and
2730          subtract 1 produces slightly better code and results in algorithm
2731          selection much faster than treating it like the ...0111 case
2732          below.  */
2733       if (w == 0
2734           || (w > 2
2735               /* Reject the case where t is 3.
2736                  Thus we prefer addition in that case.  */
2737               && t != 3))
2738         {
2739           /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2740
2741           op_cost = add_cost (speed, mode);
2742           new_limit.cost = best_cost.cost - op_cost;
2743           new_limit.latency = best_cost.latency - op_cost;
2744           synth_mult (alg_in, t + 1, &new_limit, mode);
2745
2746           alg_in->cost.cost += op_cost;
2747           alg_in->cost.latency += op_cost;
2748           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2749             {
2750               best_cost = alg_in->cost;
2751               std::swap (alg_in, best_alg);
2752               best_alg->log[best_alg->ops] = 0;
2753               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2754             }
2755         }
2756       else
2757         {
2758           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2759
2760           op_cost = add_cost (speed, mode);
2761           new_limit.cost = best_cost.cost - op_cost;
2762           new_limit.latency = best_cost.latency - op_cost;
2763           synth_mult (alg_in, t - 1, &new_limit, mode);
2764
2765           alg_in->cost.cost += op_cost;
2766           alg_in->cost.latency += op_cost;
2767           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2768             {
2769               best_cost = alg_in->cost;
2770               std::swap (alg_in, best_alg);
2771               best_alg->log[best_alg->ops] = 0;
2772               best_alg->op[best_alg->ops] = alg_add_t_m2;
2773             }
2774         }
2775
2776       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2777          quickly with a - a * n for some appropriate constant n.  */
2778       m = exact_log2 (-orig_t + 1);
2779       if (m >= 0 && m < maxm)
2780         {
2781           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2782           /* If the target has a cheap shift-and-subtract insn use
2783              that in preference to a shift insn followed by a sub insn.
2784              Assume that the shift-and-sub is "atomic" with a latency
2785              equal to it's cost, otherwise assume that on superscalar
2786              hardware the shift may be executed concurrently with the
2787              earlier steps in the algorithm.  */
2788           if (shiftsub1_cost (speed, mode, m) <= op_cost)
2789             {
2790               op_cost = shiftsub1_cost (speed, mode, m);
2791               op_latency = op_cost;
2792             }
2793           else
2794             op_latency = add_cost (speed, mode);
2795
2796           new_limit.cost = best_cost.cost - op_cost;
2797           new_limit.latency = best_cost.latency - op_latency;
2798           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2799                       &new_limit, mode);
2800
2801           alg_in->cost.cost += op_cost;
2802           alg_in->cost.latency += op_latency;
2803           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2804             {
2805               best_cost = alg_in->cost;
2806               std::swap (alg_in, best_alg);
2807               best_alg->log[best_alg->ops] = m;
2808               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2809             }
2810         }
2811
2812       if (cache_hit)
2813         goto done;
2814     }
2815
2816   /* Look for factors of t of the form
2817      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2818      If we find such a factor, we can multiply by t using an algorithm that
2819      multiplies by q, shift the result by m and add/subtract it to itself.
2820
2821      We search for large factors first and loop down, even if large factors
2822      are less probable than small; if we find a large factor we will find a
2823      good sequence quickly, and therefore be able to prune (by decreasing
2824      COST_LIMIT) the search.  */
2825
2826  do_alg_addsub_factor:
2827   for (m = floor_log2 (t - 1); m >= 2; m--)
2828     {
2829       unsigned HOST_WIDE_INT d;
2830
2831       d = (HOST_WIDE_INT_1U << m) + 1;
2832       if (t % d == 0 && t > d && m < maxm
2833           && (!cache_hit || cache_alg == alg_add_factor))
2834         {
2835           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2836           if (shiftadd_cost (speed, mode, m) <= op_cost)
2837             op_cost = shiftadd_cost (speed, mode, m);
2838
2839           op_latency = op_cost;
2840
2841
2842           new_limit.cost = best_cost.cost - op_cost;
2843           new_limit.latency = best_cost.latency - op_latency;
2844           synth_mult (alg_in, t / d, &new_limit, mode);
2845
2846           alg_in->cost.cost += op_cost;
2847           alg_in->cost.latency += op_latency;
2848           if (alg_in->cost.latency < op_cost)
2849             alg_in->cost.latency = op_cost;
2850           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2851             {
2852               best_cost = alg_in->cost;
2853               std::swap (alg_in, best_alg);
2854               best_alg->log[best_alg->ops] = m;
2855               best_alg->op[best_alg->ops] = alg_add_factor;
2856             }
2857           /* Other factors will have been taken care of in the recursion.  */
2858           break;
2859         }
2860
2861       d = (HOST_WIDE_INT_1U << m) - 1;
2862       if (t % d == 0 && t > d && m < maxm
2863           && (!cache_hit || cache_alg == alg_sub_factor))
2864         {
2865           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2866           if (shiftsub0_cost (speed, mode, m) <= op_cost)
2867             op_cost = shiftsub0_cost (speed, mode, m);
2868
2869           op_latency = op_cost;
2870
2871           new_limit.cost = best_cost.cost - op_cost;
2872           new_limit.latency = best_cost.latency - op_latency;
2873           synth_mult (alg_in, t / d, &new_limit, mode);
2874
2875           alg_in->cost.cost += op_cost;
2876           alg_in->cost.latency += op_latency;
2877           if (alg_in->cost.latency < op_cost)
2878             alg_in->cost.latency = op_cost;
2879           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2880             {
2881               best_cost = alg_in->cost;
2882               std::swap (alg_in, best_alg);
2883               best_alg->log[best_alg->ops] = m;
2884               best_alg->op[best_alg->ops] = alg_sub_factor;
2885             }
2886           break;
2887         }
2888     }
2889   if (cache_hit)
2890     goto done;
2891
2892   /* Try shift-and-add (load effective address) instructions,
2893      i.e. do a*3, a*5, a*9.  */
2894   if ((t & 1) != 0)
2895     {
2896     do_alg_add_t2_m:
2897       q = t - 1;
2898       m = ctz_hwi (q);
2899       if (q && m < maxm)
2900         {
2901           op_cost = shiftadd_cost (speed, mode, m);
2902           new_limit.cost = best_cost.cost - op_cost;
2903           new_limit.latency = best_cost.latency - op_cost;
2904           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2905
2906           alg_in->cost.cost += op_cost;
2907           alg_in->cost.latency += op_cost;
2908           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2909             {
2910               best_cost = alg_in->cost;
2911               std::swap (alg_in, best_alg);
2912               best_alg->log[best_alg->ops] = m;
2913               best_alg->op[best_alg->ops] = alg_add_t2_m;
2914             }
2915         }
2916       if (cache_hit)
2917         goto done;
2918
2919     do_alg_sub_t2_m:
2920       q = t + 1;
2921       m = ctz_hwi (q);
2922       if (q && m < maxm)
2923         {
2924           op_cost = shiftsub0_cost (speed, mode, m);
2925           new_limit.cost = best_cost.cost - op_cost;
2926           new_limit.latency = best_cost.latency - op_cost;
2927           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2928
2929           alg_in->cost.cost += op_cost;
2930           alg_in->cost.latency += op_cost;
2931           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2932             {
2933               best_cost = alg_in->cost;
2934               std::swap (alg_in, best_alg);
2935               best_alg->log[best_alg->ops] = m;
2936               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2937             }
2938         }
2939       if (cache_hit)
2940         goto done;
2941     }
2942
2943  done:
2944   /* If best_cost has not decreased, we have not found any algorithm.  */
2945   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2946     {
2947       /* We failed to find an algorithm.  Record alg_impossible for
2948          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2949          we are asked to find an algorithm for T within the same or
2950          lower COST_LIMIT, we can immediately return to the
2951          caller.  */
2952       entry_ptr->t = t;
2953       entry_ptr->mode = mode;
2954       entry_ptr->speed = speed;
2955       entry_ptr->alg = alg_impossible;
2956       entry_ptr->cost = *cost_limit;
2957       return;
2958     }
2959
2960   /* Cache the result.  */
2961   if (!cache_hit)
2962     {
2963       entry_ptr->t = t;
2964       entry_ptr->mode = mode;
2965       entry_ptr->speed = speed;
2966       entry_ptr->alg = best_alg->op[best_alg->ops];
2967       entry_ptr->cost.cost = best_cost.cost;
2968       entry_ptr->cost.latency = best_cost.latency;
2969     }
2970
2971   /* If we are getting a too long sequence for `struct algorithm'
2972      to record, make this search fail.  */
2973   if (best_alg->ops == MAX_BITS_PER_WORD)
2974     return;
2975
2976   /* Copy the algorithm from temporary space to the space at alg_out.
2977      We avoid using structure assignment because the majority of
2978      best_alg is normally undefined, and this is a critical function.  */
2979   alg_out->ops = best_alg->ops + 1;
2980   alg_out->cost = best_cost;
2981   memcpy (alg_out->op, best_alg->op,
2982           alg_out->ops * sizeof *alg_out->op);
2983   memcpy (alg_out->log, best_alg->log,
2984           alg_out->ops * sizeof *alg_out->log);
2985 }
2986 \f
2987 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2988    Try three variations:
2989
2990        - a shift/add sequence based on VAL itself
2991        - a shift/add sequence based on -VAL, followed by a negation
2992        - a shift/add sequence based on VAL - 1, followed by an addition.
2993
2994    Return true if the cheapest of these cost less than MULT_COST,
2995    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2996
2997 bool
2998 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
2999                      struct algorithm *alg, enum mult_variant *variant,
3000                      int mult_cost)
3001 {
3002   struct algorithm alg2;
3003   struct mult_cost limit;
3004   int op_cost;
3005   bool speed = optimize_insn_for_speed_p ();
3006
3007   /* Fail quickly for impossible bounds.  */
3008   if (mult_cost < 0)
3009     return false;
3010
3011   /* Ensure that mult_cost provides a reasonable upper bound.
3012      Any constant multiplication can be performed with less
3013      than 2 * bits additions.  */
3014   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3015   if (mult_cost > op_cost)
3016     mult_cost = op_cost;
3017
3018   *variant = basic_variant;
3019   limit.cost = mult_cost;
3020   limit.latency = mult_cost;
3021   synth_mult (alg, val, &limit, mode);
3022
3023   /* This works only if the inverted value actually fits in an
3024      `unsigned int' */
3025   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3026     {
3027       op_cost = neg_cost (speed, mode);
3028       if (MULT_COST_LESS (&alg->cost, mult_cost))
3029         {
3030           limit.cost = alg->cost.cost - op_cost;
3031           limit.latency = alg->cost.latency - op_cost;
3032         }
3033       else
3034         {
3035           limit.cost = mult_cost - op_cost;
3036           limit.latency = mult_cost - op_cost;
3037         }
3038
3039       synth_mult (&alg2, -val, &limit, mode);
3040       alg2.cost.cost += op_cost;
3041       alg2.cost.latency += op_cost;
3042       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3043         *alg = alg2, *variant = negate_variant;
3044     }
3045
3046   /* This proves very useful for division-by-constant.  */
3047   op_cost = add_cost (speed, mode);
3048   if (MULT_COST_LESS (&alg->cost, mult_cost))
3049     {
3050       limit.cost = alg->cost.cost - op_cost;
3051       limit.latency = alg->cost.latency - op_cost;
3052     }
3053   else
3054     {
3055       limit.cost = mult_cost - op_cost;
3056       limit.latency = mult_cost - op_cost;
3057     }
3058
3059   synth_mult (&alg2, val - 1, &limit, mode);
3060   alg2.cost.cost += op_cost;
3061   alg2.cost.latency += op_cost;
3062   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3063     *alg = alg2, *variant = add_variant;
3064
3065   return MULT_COST_LESS (&alg->cost, mult_cost);
3066 }
3067
3068 /* A subroutine of expand_mult, used for constant multiplications.
3069    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3070    convenient.  Use the shift/add sequence described by ALG and apply
3071    the final fixup specified by VARIANT.  */
3072
3073 static rtx
3074 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3075                    rtx target, const struct algorithm *alg,
3076                    enum mult_variant variant)
3077 {
3078   unsigned HOST_WIDE_INT val_so_far;
3079   rtx_insn *insn;
3080   rtx accum, tem;
3081   int opno;
3082   machine_mode nmode;
3083
3084   /* Avoid referencing memory over and over and invalid sharing
3085      on SUBREGs.  */
3086   op0 = force_reg (mode, op0);
3087
3088   /* ACCUM starts out either as OP0 or as a zero, depending on
3089      the first operation.  */
3090
3091   if (alg->op[0] == alg_zero)
3092     {
3093       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3094       val_so_far = 0;
3095     }
3096   else if (alg->op[0] == alg_m)
3097     {
3098       accum = copy_to_mode_reg (mode, op0);
3099       val_so_far = 1;
3100     }
3101   else
3102     gcc_unreachable ();
3103
3104   for (opno = 1; opno < alg->ops; opno++)
3105     {
3106       int log = alg->log[opno];
3107       rtx shift_subtarget = optimize ? 0 : accum;
3108       rtx add_target
3109         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3110            && !optimize)
3111           ? target : 0;
3112       rtx accum_target = optimize ? 0 : accum;
3113       rtx accum_inner;
3114
3115       switch (alg->op[opno])
3116         {
3117         case alg_shift:
3118           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3119           /* REG_EQUAL note will be attached to the following insn.  */
3120           emit_move_insn (accum, tem);
3121           val_so_far <<= log;
3122           break;
3123
3124         case alg_add_t_m2:
3125           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3126           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3127                                  add_target ? add_target : accum_target);
3128           val_so_far += HOST_WIDE_INT_1U << log;
3129           break;
3130
3131         case alg_sub_t_m2:
3132           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3133           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3134                                  add_target ? add_target : accum_target);
3135           val_so_far -= HOST_WIDE_INT_1U << log;
3136           break;
3137
3138         case alg_add_t2_m:
3139           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3140                                 log, shift_subtarget, 0);
3141           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3142                                  add_target ? add_target : accum_target);
3143           val_so_far = (val_so_far << log) + 1;
3144           break;
3145
3146         case alg_sub_t2_m:
3147           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3148                                 log, shift_subtarget, 0);
3149           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3150                                  add_target ? add_target : accum_target);
3151           val_so_far = (val_so_far << log) - 1;
3152           break;
3153
3154         case alg_add_factor:
3155           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3156           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3157                                  add_target ? add_target : accum_target);
3158           val_so_far += val_so_far << log;
3159           break;
3160
3161         case alg_sub_factor:
3162           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3163           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3164                                  (add_target
3165                                   ? add_target : (optimize ? 0 : tem)));
3166           val_so_far = (val_so_far << log) - val_so_far;
3167           break;
3168
3169         default:
3170           gcc_unreachable ();
3171         }
3172
3173       if (SCALAR_INT_MODE_P (mode))
3174         {
3175           /* Write a REG_EQUAL note on the last insn so that we can cse
3176              multiplication sequences.  Note that if ACCUM is a SUBREG,
3177              we've set the inner register and must properly indicate that.  */
3178           tem = op0, nmode = mode;
3179           accum_inner = accum;
3180           if (GET_CODE (accum) == SUBREG)
3181             {
3182               accum_inner = SUBREG_REG (accum);
3183               nmode = GET_MODE (accum_inner);
3184               tem = gen_lowpart (nmode, op0);
3185             }
3186
3187           insn = get_last_insn ();
3188           set_dst_reg_note (insn, REG_EQUAL,
3189                             gen_rtx_MULT (nmode, tem,
3190                                           gen_int_mode (val_so_far, nmode)),
3191                             accum_inner);
3192         }
3193     }
3194
3195   if (variant == negate_variant)
3196     {
3197       val_so_far = -val_so_far;
3198       accum = expand_unop (mode, neg_optab, accum, target, 0);
3199     }
3200   else if (variant == add_variant)
3201     {
3202       val_so_far = val_so_far + 1;
3203       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3204     }
3205
3206   /* Compare only the bits of val and val_so_far that are significant
3207      in the result mode, to avoid sign-/zero-extension confusion.  */
3208   nmode = GET_MODE_INNER (mode);
3209   val &= GET_MODE_MASK (nmode);
3210   val_so_far &= GET_MODE_MASK (nmode);
3211   gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3212
3213   return accum;
3214 }
3215
3216 /* Perform a multiplication and return an rtx for the result.
3217    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3218    TARGET is a suggestion for where to store the result (an rtx).
3219
3220    We check specially for a constant integer as OP1.
3221    If you want this check for OP0 as well, then before calling
3222    you should swap the two operands if OP0 would be constant.  */
3223
3224 rtx
3225 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3226              int unsignedp)
3227 {
3228   enum mult_variant variant;
3229   struct algorithm algorithm;
3230   rtx scalar_op1;
3231   int max_cost;
3232   bool speed = optimize_insn_for_speed_p ();
3233   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3234
3235   if (CONSTANT_P (op0))
3236     std::swap (op0, op1);
3237
3238   /* For vectors, there are several simplifications that can be made if
3239      all elements of the vector constant are identical.  */
3240   scalar_op1 = unwrap_const_vec_duplicate (op1);
3241
3242   if (INTEGRAL_MODE_P (mode))
3243     {
3244       rtx fake_reg;
3245       HOST_WIDE_INT coeff;
3246       bool is_neg;
3247       int mode_bitsize;
3248
3249       if (op1 == CONST0_RTX (mode))
3250         return op1;
3251       if (op1 == CONST1_RTX (mode))
3252         return op0;
3253       if (op1 == CONSTM1_RTX (mode))
3254         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3255                             op0, target, 0);
3256
3257       if (do_trapv)
3258         goto skip_synth;
3259
3260       /* If mode is integer vector mode, check if the backend supports
3261          vector lshift (by scalar or vector) at all.  If not, we can't use
3262          synthetized multiply.  */
3263       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3264           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3265           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3266         goto skip_synth;
3267
3268       /* These are the operations that are potentially turned into
3269          a sequence of shifts and additions.  */
3270       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3271
3272       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3273          less than or equal in size to `unsigned int' this doesn't matter.
3274          If the mode is larger than `unsigned int', then synth_mult works
3275          only if the constant value exactly fits in an `unsigned int' without
3276          any truncation.  This means that multiplying by negative values does
3277          not work; results are off by 2^32 on a 32 bit machine.  */
3278       if (CONST_INT_P (scalar_op1))
3279         {
3280           coeff = INTVAL (scalar_op1);
3281           is_neg = coeff < 0;
3282         }
3283 #if TARGET_SUPPORTS_WIDE_INT
3284       else if (CONST_WIDE_INT_P (scalar_op1))
3285 #else
3286       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3287 #endif
3288         {
3289           int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3290           /* Perfect power of 2 (other than 1, which is handled above).  */
3291           if (shift > 0)
3292             return expand_shift (LSHIFT_EXPR, mode, op0,
3293                                  shift, target, unsignedp);
3294           else
3295             goto skip_synth;
3296         }
3297       else
3298         goto skip_synth;
3299
3300       /* We used to test optimize here, on the grounds that it's better to
3301          produce a smaller program when -O is not used.  But this causes
3302          such a terrible slowdown sometimes that it seems better to always
3303          use synth_mult.  */
3304
3305       /* Special case powers of two.  */
3306       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3307           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3308         return expand_shift (LSHIFT_EXPR, mode, op0,
3309                              floor_log2 (coeff), target, unsignedp);
3310
3311       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3312
3313       /* Attempt to handle multiplication of DImode values by negative
3314          coefficients, by performing the multiplication by a positive
3315          multiplier and then inverting the result.  */
3316       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3317         {
3318           /* Its safe to use -coeff even for INT_MIN, as the
3319              result is interpreted as an unsigned coefficient.
3320              Exclude cost of op0 from max_cost to match the cost
3321              calculation of the synth_mult.  */
3322           coeff = -(unsigned HOST_WIDE_INT) coeff;
3323           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3324                                     mode, speed)
3325                       - neg_cost (speed, mode));
3326           if (max_cost <= 0)
3327             goto skip_synth;
3328
3329           /* Special case powers of two.  */
3330           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3331             {
3332               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3333                                        floor_log2 (coeff), target, unsignedp);
3334               return expand_unop (mode, neg_optab, temp, target, 0);
3335             }
3336
3337           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3338                                    max_cost))
3339             {
3340               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3341                                             &algorithm, variant);
3342               return expand_unop (mode, neg_optab, temp, target, 0);
3343             }
3344           goto skip_synth;
3345         }
3346
3347       /* Exclude cost of op0 from max_cost to match the cost
3348          calculation of the synth_mult.  */
3349       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3350       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3351         return expand_mult_const (mode, op0, coeff, target,
3352                                   &algorithm, variant);
3353     }
3354  skip_synth:
3355
3356   /* Expand x*2.0 as x+x.  */
3357   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3358       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3359     {
3360       op0 = force_reg (GET_MODE (op0), op0);
3361       return expand_binop (mode, add_optab, op0, op0,
3362                            target, unsignedp, OPTAB_LIB_WIDEN);
3363     }
3364
3365   /* This used to use umul_optab if unsigned, but for non-widening multiply
3366      there is no difference between signed and unsigned.  */
3367   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3368                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3369   gcc_assert (op0);
3370   return op0;
3371 }
3372
3373 /* Return a cost estimate for multiplying a register by the given
3374    COEFFicient in the given MODE and SPEED.  */
3375
3376 int
3377 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3378 {
3379   int max_cost;
3380   struct algorithm algorithm;
3381   enum mult_variant variant;
3382
3383   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3384   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3385                            mode, speed);
3386   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3387     return algorithm.cost.cost;
3388   else
3389     return max_cost;
3390 }
3391
3392 /* Perform a widening multiplication and return an rtx for the result.
3393    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3394    TARGET is a suggestion for where to store the result (an rtx).
3395    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3396    or smul_widen_optab.
3397
3398    We check specially for a constant integer as OP1, comparing the
3399    cost of a widening multiply against the cost of a sequence of shifts
3400    and adds.  */
3401
3402 rtx
3403 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3404                       int unsignedp, optab this_optab)
3405 {
3406   bool speed = optimize_insn_for_speed_p ();
3407   rtx cop1;
3408
3409   if (CONST_INT_P (op1)
3410       && GET_MODE (op0) != VOIDmode
3411       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3412                                 this_optab == umul_widen_optab))
3413       && CONST_INT_P (cop1)
3414       && (INTVAL (cop1) >= 0
3415           || HWI_COMPUTABLE_MODE_P (mode)))
3416     {
3417       HOST_WIDE_INT coeff = INTVAL (cop1);
3418       int max_cost;
3419       enum mult_variant variant;
3420       struct algorithm algorithm;
3421
3422       if (coeff == 0)
3423         return CONST0_RTX (mode);
3424
3425       /* Special case powers of two.  */
3426       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3427         {
3428           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3429           return expand_shift (LSHIFT_EXPR, mode, op0,
3430                                floor_log2 (coeff), target, unsignedp);
3431         }
3432
3433       /* Exclude cost of op0 from max_cost to match the cost
3434          calculation of the synth_mult.  */
3435       max_cost = mul_widen_cost (speed, mode);
3436       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3437                                max_cost))
3438         {
3439           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3440           return expand_mult_const (mode, op0, coeff, target,
3441                                     &algorithm, variant);
3442         }
3443     }
3444   return expand_binop (mode, this_optab, op0, op1, target,
3445                        unsignedp, OPTAB_LIB_WIDEN);
3446 }
3447 \f
3448 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3449    replace division by D, and put the least significant N bits of the result
3450    in *MULTIPLIER_PTR and return the most significant bit.
3451
3452    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3453    needed precision is in PRECISION (should be <= N).
3454
3455    PRECISION should be as small as possible so this function can choose
3456    multiplier more freely.
3457
3458    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3459    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3460
3461    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3462    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3463
3464 unsigned HOST_WIDE_INT
3465 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3466                    unsigned HOST_WIDE_INT *multiplier_ptr,
3467                    int *post_shift_ptr, int *lgup_ptr)
3468 {
3469   int lgup, post_shift;
3470   int pow, pow2;
3471
3472   /* lgup = ceil(log2(divisor)); */
3473   lgup = ceil_log2 (d);
3474
3475   gcc_assert (lgup <= n);
3476
3477   pow = n + lgup;
3478   pow2 = n + lgup - precision;
3479
3480   /* mlow = 2^(N + lgup)/d */
3481   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3482   wide_int mlow = wi::udiv_trunc (val, d);
3483
3484   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3485   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3486   wide_int mhigh = wi::udiv_trunc (val, d);
3487
3488   /* If precision == N, then mlow, mhigh exceed 2^N
3489      (but they do not exceed 2^(N+1)).  */
3490
3491   /* Reduce to lowest terms.  */
3492   for (post_shift = lgup; post_shift > 0; post_shift--)
3493     {
3494       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3495                                                        HOST_BITS_PER_WIDE_INT);
3496       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3497                                                        HOST_BITS_PER_WIDE_INT);
3498       if (ml_lo >= mh_lo)
3499         break;
3500
3501       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3502       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3503     }
3504
3505   *post_shift_ptr = post_shift;
3506   *lgup_ptr = lgup;
3507   if (n < HOST_BITS_PER_WIDE_INT)
3508     {
3509       unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3510       *multiplier_ptr = mhigh.to_uhwi () & mask;
3511       return mhigh.to_uhwi () >= mask;
3512     }
3513   else
3514     {
3515       *multiplier_ptr = mhigh.to_uhwi ();
3516       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3517     }
3518 }
3519
3520 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3521    congruent to 1 (mod 2**N).  */
3522
3523 static unsigned HOST_WIDE_INT
3524 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3525 {
3526   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3527
3528   /* The algorithm notes that the choice y = x satisfies
3529      x*y == 1 mod 2^3, since x is assumed odd.
3530      Each iteration doubles the number of bits of significance in y.  */
3531
3532   unsigned HOST_WIDE_INT mask;
3533   unsigned HOST_WIDE_INT y = x;
3534   int nbit = 3;
3535
3536   mask = (n == HOST_BITS_PER_WIDE_INT
3537           ? HOST_WIDE_INT_M1U
3538           : (HOST_WIDE_INT_1U << n) - 1);
3539
3540   while (nbit < n)
3541     {
3542       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3543       nbit *= 2;
3544     }
3545   return y;
3546 }
3547
3548 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3549    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3550    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3551    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3552    become signed.
3553
3554    The result is put in TARGET if that is convenient.
3555
3556    MODE is the mode of operation.  */
3557
3558 rtx
3559 expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
3560                              rtx op1, rtx target, int unsignedp)
3561 {
3562   rtx tem;
3563   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3564
3565   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3566                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3567   tem = expand_and (mode, tem, op1, NULL_RTX);
3568   adj_operand
3569     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3570                      adj_operand);
3571
3572   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3573                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3574   tem = expand_and (mode, tem, op0, NULL_RTX);
3575   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3576                           target);
3577
3578   return target;
3579 }
3580
3581 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3582
3583 static rtx
3584 extract_high_half (machine_mode mode, rtx op)
3585 {
3586   machine_mode wider_mode;
3587
3588   if (mode == word_mode)
3589     return gen_highpart (mode, op);
3590
3591   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3592
3593   wider_mode = GET_MODE_WIDER_MODE (mode);
3594   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3595                      GET_MODE_BITSIZE (mode), 0, 1);
3596   return convert_modes (mode, wider_mode, op, 0);
3597 }
3598
3599 /* Like expmed_mult_highpart, but only consider using a multiplication
3600    optab.  OP1 is an rtx for the constant operand.  */
3601
3602 static rtx
3603 expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
3604                             rtx target, int unsignedp, int max_cost)
3605 {
3606   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3607   machine_mode wider_mode;
3608   optab moptab;
3609   rtx tem;
3610   int size;
3611   bool speed = optimize_insn_for_speed_p ();
3612
3613   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3614
3615   wider_mode = GET_MODE_WIDER_MODE (mode);
3616   size = GET_MODE_BITSIZE (mode);
3617
3618   /* Firstly, try using a multiplication insn that only generates the needed
3619      high part of the product, and in the sign flavor of unsignedp.  */
3620   if (mul_highpart_cost (speed, mode) < max_cost)
3621     {
3622       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3623       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3624                           unsignedp, OPTAB_DIRECT);
3625       if (tem)
3626         return tem;
3627     }
3628
3629   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3630      Need to adjust the result after the multiplication.  */
3631   if (size - 1 < BITS_PER_WORD
3632       && (mul_highpart_cost (speed, mode)
3633           + 2 * shift_cost (speed, mode, size-1)
3634           + 4 * add_cost (speed, mode) < max_cost))
3635     {
3636       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3637       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3638                           unsignedp, OPTAB_DIRECT);
3639       if (tem)
3640         /* We used the wrong signedness.  Adjust the result.  */
3641         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3642                                             tem, unsignedp);
3643     }
3644
3645   /* Try widening multiplication.  */
3646   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3647   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3648       && mul_widen_cost (speed, wider_mode) < max_cost)
3649     {
3650       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3651                           unsignedp, OPTAB_WIDEN);
3652       if (tem)
3653         return extract_high_half (mode, tem);
3654     }
3655
3656   /* Try widening the mode and perform a non-widening multiplication.  */
3657   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3658       && size - 1 < BITS_PER_WORD
3659       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3660           < max_cost))
3661     {
3662       rtx_insn *insns;
3663       rtx wop0, wop1;
3664
3665       /* We need to widen the operands, for example to ensure the
3666          constant multiplier is correctly sign or zero extended.
3667          Use a sequence to clean-up any instructions emitted by
3668          the conversions if things don't work out.  */
3669       start_sequence ();
3670       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3671       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3672       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3673                           unsignedp, OPTAB_WIDEN);
3674       insns = get_insns ();
3675       end_sequence ();
3676
3677       if (tem)
3678         {
3679           emit_insn (insns);
3680           return extract_high_half (mode, tem);
3681         }
3682     }
3683
3684   /* Try widening multiplication of opposite signedness, and adjust.  */
3685   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3686   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3687       && size - 1 < BITS_PER_WORD
3688       && (mul_widen_cost (speed, wider_mode)
3689           + 2 * shift_cost (speed, mode, size-1)
3690           + 4 * add_cost (speed, mode) < max_cost))
3691     {
3692       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3693                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3694       if (tem != 0)
3695         {
3696           tem = extract_high_half (mode, tem);
3697           /* We used the wrong signedness.  Adjust the result.  */
3698           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3699                                               target, unsignedp);
3700         }
3701     }
3702
3703   return 0;
3704 }
3705
3706 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3707    putting the high half of the result in TARGET if that is convenient,
3708    and return where the result is.  If the operation can not be performed,
3709    0 is returned.
3710
3711    MODE is the mode of operation and result.
3712
3713    UNSIGNEDP nonzero means unsigned multiply.
3714
3715    MAX_COST is the total allowed cost for the expanded RTL.  */
3716
3717 static rtx
3718 expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
3719                       rtx target, int unsignedp, int max_cost)
3720 {
3721   machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3722   unsigned HOST_WIDE_INT cnst1;
3723   int extra_cost;
3724   bool sign_adjust = false;
3725   enum mult_variant variant;
3726   struct algorithm alg;
3727   rtx tem;
3728   bool speed = optimize_insn_for_speed_p ();
3729
3730   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3731   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3732   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3733
3734   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3735
3736   /* We can't optimize modes wider than BITS_PER_WORD.
3737      ??? We might be able to perform double-word arithmetic if
3738      mode == word_mode, however all the cost calculations in
3739      synth_mult etc. assume single-word operations.  */
3740   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3741     return expmed_mult_highpart_optab (mode, op0, op1, target,
3742                                        unsignedp, max_cost);
3743
3744   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3745
3746   /* Check whether we try to multiply by a negative constant.  */
3747   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3748     {
3749       sign_adjust = true;
3750       extra_cost += add_cost (speed, mode);
3751     }
3752
3753   /* See whether shift/add multiplication is cheap enough.  */
3754   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3755                            max_cost - extra_cost))
3756     {
3757       /* See whether the specialized multiplication optabs are
3758          cheaper than the shift/add version.  */
3759       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3760                                         alg.cost.cost + extra_cost);
3761       if (tem)
3762         return tem;
3763
3764       tem = convert_to_mode (wider_mode, op0, unsignedp);
3765       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3766       tem = extract_high_half (mode, tem);
3767
3768       /* Adjust result for signedness.  */
3769       if (sign_adjust)
3770         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3771
3772       return tem;
3773     }
3774   return expmed_mult_highpart_optab (mode, op0, op1, target,
3775                                      unsignedp, max_cost);
3776 }
3777
3778
3779 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3780
3781 static rtx
3782 expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3783 {
3784   rtx result, temp, shift;
3785   rtx_code_label *label;
3786   int logd;
3787   int prec = GET_MODE_PRECISION (mode);
3788
3789   logd = floor_log2 (d);
3790   result = gen_reg_rtx (mode);
3791
3792   /* Avoid conditional branches when they're expensive.  */
3793   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3794       && optimize_insn_for_speed_p ())
3795     {
3796       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3797                                       mode, 0, -1);
3798       if (signmask)
3799         {
3800           HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
3801           signmask = force_reg (mode, signmask);
3802           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3803
3804           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3805              which instruction sequence to use.  If logical right shifts
3806              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3807              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3808
3809           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3810           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3811               || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
3812                   > COSTS_N_INSNS (2)))
3813             {
3814               temp = expand_binop (mode, xor_optab, op0, signmask,
3815                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3816               temp = expand_binop (mode, sub_optab, temp, signmask,
3817                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3818               temp = expand_binop (mode, and_optab, temp,
3819                                    gen_int_mode (masklow, mode),
3820                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3821               temp = expand_binop (mode, xor_optab, temp, signmask,
3822                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3823               temp = expand_binop (mode, sub_optab, temp, signmask,
3824                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3825             }
3826           else
3827             {
3828               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3829                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3830               signmask = force_reg (mode, signmask);
3831
3832               temp = expand_binop (mode, add_optab, op0, signmask,
3833                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3834               temp = expand_binop (mode, and_optab, temp,
3835                                    gen_int_mode (masklow, mode),
3836                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3837               temp = expand_binop (mode, sub_optab, temp, signmask,
3838                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3839             }
3840           return temp;
3841         }
3842     }
3843
3844   /* Mask contains the mode's signbit and the significant bits of the
3845      modulus.  By including the signbit in the operation, many targets
3846      can avoid an explicit compare operation in the following comparison
3847      against zero.  */
3848   wide_int mask = wi::mask (logd, false, prec);
3849   mask = wi::set_bit (mask, prec - 1);
3850
3851   temp = expand_binop (mode, and_optab, op0,
3852                        immed_wide_int_const (mask, mode),
3853                        result, 1, OPTAB_LIB_WIDEN);
3854   if (temp != result)
3855     emit_move_insn (result, temp);
3856
3857   label = gen_label_rtx ();
3858   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3859
3860   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3861                        0, OPTAB_LIB_WIDEN);
3862
3863   mask = wi::mask (logd, true, prec);
3864   temp = expand_binop (mode, ior_optab, temp,
3865                        immed_wide_int_const (mask, mode),
3866                        result, 1, OPTAB_LIB_WIDEN);
3867   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3868                        0, OPTAB_LIB_WIDEN);
3869   if (temp != result)
3870     emit_move_insn (result, temp);
3871   emit_label (label);
3872   return result;
3873 }
3874
3875 /* Expand signed division of OP0 by a power of two D in mode MODE.
3876    This routine is only called for positive values of D.  */
3877
3878 static rtx
3879 expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3880 {
3881   rtx temp;
3882   rtx_code_label *label;
3883   int logd;
3884
3885   logd = floor_log2 (d);
3886
3887   if (d == 2
3888       && BRANCH_COST (optimize_insn_for_speed_p (),
3889                       false) >= 1)
3890     {
3891       temp = gen_reg_rtx (mode);
3892       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3893       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3894                            0, OPTAB_LIB_WIDEN);
3895       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3896     }
3897
3898   if (HAVE_conditional_move
3899       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
3900     {
3901       rtx temp2;
3902
3903       start_sequence ();
3904       temp2 = copy_to_mode_reg (mode, op0);
3905       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3906                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3907       temp = force_reg (mode, temp);
3908
3909       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3910       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3911                                      mode, temp, temp2, mode, 0);
3912       if (temp2)
3913         {
3914           rtx_insn *seq = get_insns ();
3915           end_sequence ();
3916           emit_insn (seq);
3917           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3918         }
3919       end_sequence ();
3920     }
3921
3922   if (BRANCH_COST (optimize_insn_for_speed_p (),
3923                    false) >= 2)
3924     {
3925       int ushift = GET_MODE_BITSIZE (mode) - logd;
3926
3927       temp = gen_reg_rtx (mode);
3928       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3929       if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3930           || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3931              > COSTS_N_INSNS (1))
3932         temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3933                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3934       else
3935         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3936                              ushift, NULL_RTX, 1);
3937       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3938                            0, OPTAB_LIB_WIDEN);
3939       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3940     }
3941
3942   label = gen_label_rtx ();
3943   temp = copy_to_mode_reg (mode, op0);
3944   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3945   expand_inc (temp, gen_int_mode (d - 1, mode));
3946   emit_label (label);
3947   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3948 }
3949 \f
3950 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3951    if that is convenient, and returning where the result is.
3952    You may request either the quotient or the remainder as the result;
3953    specify REM_FLAG nonzero to get the remainder.
3954
3955    CODE is the expression code for which kind of division this is;
3956    it controls how rounding is done.  MODE is the machine mode to use.
3957    UNSIGNEDP nonzero means do unsigned division.  */
3958
3959 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3960    and then correct it by or'ing in missing high bits
3961    if result of ANDI is nonzero.
3962    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3963    This could optimize to a bfexts instruction.
3964    But C doesn't use these operations, so their optimizations are
3965    left for later.  */
3966 /* ??? For modulo, we don't actually need the highpart of the first product,
3967    the low part will do nicely.  And for small divisors, the second multiply
3968    can also be a low-part only multiply or even be completely left out.
3969    E.g. to calculate the remainder of a division by 3 with a 32 bit
3970    multiply, multiply with 0x55555556 and extract the upper two bits;
3971    the result is exact for inputs up to 0x1fffffff.
3972    The input range can be reduced by using cross-sum rules.
3973    For odd divisors >= 3, the following table gives right shift counts
3974    so that if a number is shifted by an integer multiple of the given
3975    amount, the remainder stays the same:
3976    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3977    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3978    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3979    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3980    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3981
3982    Cross-sum rules for even numbers can be derived by leaving as many bits
3983    to the right alone as the divisor has zeros to the right.
3984    E.g. if x is an unsigned 32 bit number:
3985    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3986    */
3987
3988 rtx
3989 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
3990                rtx op0, rtx op1, rtx target, int unsignedp)
3991 {
3992   machine_mode compute_mode;
3993   rtx tquotient;
3994   rtx quotient = 0, remainder = 0;
3995   rtx_insn *last;
3996   int size;
3997   rtx_insn *insn;
3998   optab optab1, optab2;
3999   int op1_is_constant, op1_is_pow2 = 0;
4000   int max_cost, extra_cost;
4001   static HOST_WIDE_INT last_div_const = 0;
4002   bool speed = optimize_insn_for_speed_p ();
4003
4004   op1_is_constant = CONST_INT_P (op1);
4005   if (op1_is_constant)
4006     {
4007       wide_int ext_op1 = rtx_mode_t (op1, mode);
4008       op1_is_pow2 = (wi::popcount (ext_op1) == 1
4009                      || (! unsignedp
4010                          && wi::popcount (wi::neg (ext_op1)) == 1));
4011     }
4012
4013   /*
4014      This is the structure of expand_divmod:
4015
4016      First comes code to fix up the operands so we can perform the operations
4017      correctly and efficiently.
4018
4019      Second comes a switch statement with code specific for each rounding mode.
4020      For some special operands this code emits all RTL for the desired
4021      operation, for other cases, it generates only a quotient and stores it in
4022      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4023      to indicate that it has not done anything.
4024
4025      Last comes code that finishes the operation.  If QUOTIENT is set and
4026      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4027      QUOTIENT is not set, it is computed using trunc rounding.
4028
4029      We try to generate special code for division and remainder when OP1 is a
4030      constant.  If |OP1| = 2**n we can use shifts and some other fast
4031      operations.  For other values of OP1, we compute a carefully selected
4032      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4033      by m.
4034
4035      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4036      half of the product.  Different strategies for generating the product are
4037      implemented in expmed_mult_highpart.
4038
4039      If what we actually want is the remainder, we generate that by another
4040      by-constant multiplication and a subtraction.  */
4041
4042   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4043      code below will malfunction if we are, so check here and handle
4044      the special case if so.  */
4045   if (op1 == const1_rtx)
4046     return rem_flag ? const0_rtx : op0;
4047
4048     /* When dividing by -1, we could get an overflow.
4049      negv_optab can handle overflows.  */
4050   if (! unsignedp && op1 == constm1_rtx)
4051     {
4052       if (rem_flag)
4053         return const0_rtx;
4054       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4055                           ? negv_optab : neg_optab, op0, target, 0);
4056     }
4057
4058   if (target
4059       /* Don't use the function value register as a target
4060          since we have to read it as well as write it,
4061          and function-inlining gets confused by this.  */
4062       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4063           /* Don't clobber an operand while doing a multi-step calculation.  */
4064           || ((rem_flag || op1_is_constant)
4065               && (reg_mentioned_p (target, op0)
4066                   || (MEM_P (op0) && MEM_P (target))))
4067           || reg_mentioned_p (target, op1)
4068           || (MEM_P (op1) && MEM_P (target))))
4069     target = 0;
4070
4071   /* Get the mode in which to perform this computation.  Normally it will
4072      be MODE, but sometimes we can't do the desired operation in MODE.
4073      If so, pick a wider mode in which we can do the operation.  Convert
4074      to that mode at the start to avoid repeated conversions.
4075
4076      First see what operations we need.  These depend on the expression
4077      we are evaluating.  (We assume that divxx3 insns exist under the
4078      same conditions that modxx3 insns and that these insns don't normally
4079      fail.  If these assumptions are not correct, we may generate less
4080      efficient code in some cases.)
4081
4082      Then see if we find a mode in which we can open-code that operation
4083      (either a division, modulus, or shift).  Finally, check for the smallest
4084      mode for which we can do the operation with a library call.  */
4085
4086   /* We might want to refine this now that we have division-by-constant
4087      optimization.  Since expmed_mult_highpart tries so many variants, it is
4088      not straightforward to generalize this.  Maybe we should make an array
4089      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4090
4091   optab1 = (op1_is_pow2
4092             ? (unsignedp ? lshr_optab : ashr_optab)
4093             : (unsignedp ? udiv_optab : sdiv_optab));
4094   optab2 = (op1_is_pow2 ? optab1
4095             : (unsignedp ? udivmod_optab : sdivmod_optab));
4096
4097   for (compute_mode = mode; compute_mode != VOIDmode;
4098        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4099     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4100         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4101       break;
4102
4103   if (compute_mode == VOIDmode)
4104     for (compute_mode = mode; compute_mode != VOIDmode;
4105          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4106       if (optab_libfunc (optab1, compute_mode)
4107           || optab_libfunc (optab2, compute_mode))
4108         break;
4109
4110   /* If we still couldn't find a mode, use MODE, but expand_binop will
4111      probably die.  */
4112   if (compute_mode == VOIDmode)
4113     compute_mode = mode;
4114
4115   if (target && GET_MODE (target) == compute_mode)
4116     tquotient = target;
4117   else
4118     tquotient = gen_reg_rtx (compute_mode);
4119
4120   size = GET_MODE_BITSIZE (compute_mode);
4121 #if 0
4122   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4123      (mode), and thereby get better code when OP1 is a constant.  Do that
4124      later.  It will require going over all usages of SIZE below.  */
4125   size = GET_MODE_BITSIZE (mode);
4126 #endif
4127
4128   /* Only deduct something for a REM if the last divide done was
4129      for a different constant.   Then set the constant of the last
4130      divide.  */
4131   max_cost = (unsignedp
4132               ? udiv_cost (speed, compute_mode)
4133               : sdiv_cost (speed, compute_mode));
4134   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4135                      && INTVAL (op1) == last_div_const))
4136     max_cost -= (mul_cost (speed, compute_mode)
4137                  + add_cost (speed, compute_mode));
4138
4139   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4140
4141   /* Now convert to the best mode to use.  */
4142   if (compute_mode != mode)
4143     {
4144       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4145       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4146
4147       /* convert_modes may have placed op1 into a register, so we
4148          must recompute the following.  */
4149       op1_is_constant = CONST_INT_P (op1);
4150       if (op1_is_constant)
4151         {
4152           wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4153           op1_is_pow2 = (wi::popcount (ext_op1) == 1
4154                          || (! unsignedp
4155                              && wi::popcount (wi::neg (ext_op1)) == 1));
4156         }
4157       else
4158         op1_is_pow2 = 0;
4159     }
4160
4161   /* If one of the operands is a volatile MEM, copy it into a register.  */
4162
4163   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4164     op0 = force_reg (compute_mode, op0);
4165   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4166     op1 = force_reg (compute_mode, op1);
4167
4168   /* If we need the remainder or if OP1 is constant, we need to
4169      put OP0 in a register in case it has any queued subexpressions.  */
4170   if (rem_flag || op1_is_constant)
4171     op0 = force_reg (compute_mode, op0);
4172
4173   last = get_last_insn ();
4174
4175   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4176   if (unsignedp)
4177     {
4178       if (code == FLOOR_DIV_EXPR)
4179         code = TRUNC_DIV_EXPR;
4180       if (code == FLOOR_MOD_EXPR)
4181         code = TRUNC_MOD_EXPR;
4182       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4183         code = TRUNC_DIV_EXPR;
4184     }
4185
4186   if (op1 != const0_rtx)
4187     switch (code)
4188       {
4189       case TRUNC_MOD_EXPR:
4190       case TRUNC_DIV_EXPR:
4191         if (op1_is_constant)
4192           {
4193             if (unsignedp)
4194               {
4195                 unsigned HOST_WIDE_INT mh, ml;
4196                 int pre_shift, post_shift;
4197                 int dummy;
4198                 wide_int wd = rtx_mode_t (op1, compute_mode);
4199                 unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4200
4201                 if (wi::popcount (wd) == 1)
4202                   {
4203                     pre_shift = floor_log2 (d);
4204                     if (rem_flag)
4205                       {
4206                         unsigned HOST_WIDE_INT mask
4207                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4208                         remainder
4209                           = expand_binop (compute_mode, and_optab, op0,
4210                                           gen_int_mode (mask, compute_mode),
4211                                           remainder, 1,
4212                                           OPTAB_LIB_WIDEN);
4213                         if (remainder)
4214                           return gen_lowpart (mode, remainder);
4215                       }
4216                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4217                                              pre_shift, tquotient, 1);
4218                   }
4219                 else if (size <= HOST_BITS_PER_WIDE_INT)
4220                   {
4221                     if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4222                       {
4223                         /* Most significant bit of divisor is set; emit an scc
4224                            insn.  */
4225                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4226                                                           compute_mode, 1, 1);
4227                       }
4228                     else
4229                       {
4230                         /* Find a suitable multiplier and right shift count
4231                            instead of multiplying with D.  */
4232
4233                         mh = choose_multiplier (d, size, size,
4234                                                 &ml, &post_shift, &dummy);
4235
4236                         /* If the suggested multiplier is more than SIZE bits,
4237                            we can do better for even divisors, using an
4238                            initial right shift.  */
4239                         if (mh != 0 && (d & 1) == 0)
4240                           {
4241                             pre_shift = ctz_or_zero (d);
4242                             mh = choose_multiplier (d >> pre_shift, size,
4243                                                     size - pre_shift,
4244                                                     &ml, &post_shift, &dummy);
4245                             gcc_assert (!mh);
4246                           }
4247                         else
4248                           pre_shift = 0;
4249
4250                         if (mh != 0)
4251                           {
4252                             rtx t1, t2, t3, t4;
4253
4254                             if (post_shift - 1 >= BITS_PER_WORD)
4255                               goto fail1;
4256
4257                             extra_cost
4258                               = (shift_cost (speed, compute_mode, post_shift - 1)
4259                                  + shift_cost (speed, compute_mode, 1)
4260                                  + 2 * add_cost (speed, compute_mode));
4261                             t1 = expmed_mult_highpart
4262                               (compute_mode, op0,
4263                                gen_int_mode (ml, compute_mode),
4264                                NULL_RTX, 1, max_cost - extra_cost);
4265                             if (t1 == 0)
4266                               goto fail1;
4267                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4268                                                                op0, t1),
4269                                                 NULL_RTX);
4270                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4271                                                t2, 1, NULL_RTX, 1);
4272                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4273                                                               t1, t3),
4274                                                 NULL_RTX);
4275                             quotient = expand_shift
4276                               (RSHIFT_EXPR, compute_mode, t4,
4277                                post_shift - 1, tquotient, 1);
4278                           }
4279                         else
4280                           {
4281                             rtx t1, t2;
4282
4283                             if (pre_shift >= BITS_PER_WORD
4284                                 || post_shift >= BITS_PER_WORD)
4285                               goto fail1;
4286
4287                             t1 = expand_shift
4288                               (RSHIFT_EXPR, compute_mode, op0,
4289                                pre_shift, NULL_RTX, 1);
4290                             extra_cost
4291                               = (shift_cost (speed, compute_mode, pre_shift)
4292                                  + shift_cost (speed, compute_mode, post_shift));
4293                             t2 = expmed_mult_highpart
4294                               (compute_mode, t1,
4295                                gen_int_mode (ml, compute_mode),
4296                                NULL_RTX, 1, max_cost - extra_cost);
4297                             if (t2 == 0)
4298                               goto fail1;
4299                             quotient = expand_shift
4300                               (RSHIFT_EXPR, compute_mode, t2,
4301                                post_shift, tquotient, 1);
4302                           }
4303                       }
4304                   }
4305                 else            /* Too wide mode to use tricky code */
4306                   break;
4307
4308                 insn = get_last_insn ();
4309                 if (insn != last)
4310                   set_dst_reg_note (insn, REG_EQUAL,
4311                                     gen_rtx_UDIV (compute_mode, op0, op1),
4312                                     quotient);
4313               }
4314             else                /* TRUNC_DIV, signed */
4315               {
4316                 unsigned HOST_WIDE_INT ml;
4317                 int lgup, post_shift;
4318                 rtx mlr;
4319                 HOST_WIDE_INT d = INTVAL (op1);
4320                 unsigned HOST_WIDE_INT abs_d;
4321
4322                 /* Since d might be INT_MIN, we have to cast to
4323                    unsigned HOST_WIDE_INT before negating to avoid
4324                    undefined signed overflow.  */
4325                 abs_d = (d >= 0
4326                          ? (unsigned HOST_WIDE_INT) d
4327                          : - (unsigned HOST_WIDE_INT) d);
4328
4329                 /* n rem d = n rem -d */
4330                 if (rem_flag && d < 0)
4331                   {
4332                     d = abs_d;
4333                     op1 = gen_int_mode (abs_d, compute_mode);
4334                   }
4335
4336                 if (d == 1)
4337                   quotient = op0;
4338                 else if (d == -1)
4339                   quotient = expand_unop (compute_mode, neg_optab, op0,
4340                                           tquotient, 0);
4341                 else if (size <= HOST_BITS_PER_WIDE_INT
4342                          && abs_d == HOST_WIDE_INT_1U << (size - 1))
4343                   {
4344                     /* This case is not handled correctly below.  */
4345                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4346                                                 compute_mode, 1, 1);
4347                     if (quotient == 0)
4348                       goto fail1;
4349                   }
4350                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4351                          && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4352                          && (rem_flag
4353                              ? smod_pow2_cheap (speed, compute_mode)
4354                              : sdiv_pow2_cheap (speed, compute_mode))
4355                          /* We assume that cheap metric is true if the
4356                             optab has an expander for this mode.  */
4357                          && ((optab_handler ((rem_flag ? smod_optab
4358                                               : sdiv_optab),
4359                                              compute_mode)
4360                               != CODE_FOR_nothing)
4361                              || (optab_handler (sdivmod_optab,
4362                                                 compute_mode)
4363                                  != CODE_FOR_nothing)))
4364                   ;
4365                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d)
4366                          && (size <= HOST_BITS_PER_WIDE_INT
4367                              || abs_d != (unsigned HOST_WIDE_INT) d))
4368                   {
4369                     if (rem_flag)
4370                       {
4371                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4372                         if (remainder)
4373                           return gen_lowpart (mode, remainder);
4374                       }
4375
4376                     if (sdiv_pow2_cheap (speed, compute_mode)
4377                         && ((optab_handler (sdiv_optab, compute_mode)
4378                              != CODE_FOR_nothing)
4379                             || (optab_handler (sdivmod_optab, compute_mode)
4380                                 != CODE_FOR_nothing)))
4381                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4382                                                 compute_mode, op0,
4383                                                 gen_int_mode (abs_d,
4384                                                               compute_mode),
4385                                                 NULL_RTX, 0);
4386                     else
4387                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4388
4389                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4390                        negate the quotient.  */
4391                     if (d < 0)
4392                       {
4393                         insn = get_last_insn ();
4394                         if (insn != last
4395                             && abs_d < (HOST_WIDE_INT_1U
4396                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4397                           set_dst_reg_note (insn, REG_EQUAL,
4398                                             gen_rtx_DIV (compute_mode, op0,
4399                                                          gen_int_mode
4400                                                            (abs_d,
4401                                                             compute_mode)),
4402                                             quotient);
4403
4404                         quotient = expand_unop (compute_mode, neg_optab,
4405                                                 quotient, quotient, 0);
4406                       }
4407                   }
4408                 else if (size <= HOST_BITS_PER_WIDE_INT)
4409                   {
4410                     choose_multiplier (abs_d, size, size - 1,
4411                                        &ml, &post_shift, &lgup);
4412                     if (ml < HOST_WIDE_INT_1U << (size - 1))
4413                       {
4414                         rtx t1, t2, t3;
4415
4416                         if (post_shift >= BITS_PER_WORD
4417                             || size - 1 >= BITS_PER_WORD)
4418                           goto fail1;
4419
4420                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4421                                       + shift_cost (speed, compute_mode, size - 1)
4422                                       + add_cost (speed, compute_mode));
4423                         t1 = expmed_mult_highpart
4424                           (compute_mode, op0, gen_int_mode (ml, compute_mode),
4425                            NULL_RTX, 0, max_cost - extra_cost);
4426                         if (t1 == 0)
4427                           goto fail1;
4428                         t2 = expand_shift
4429                           (RSHIFT_EXPR, compute_mode, t1,
4430                            post_shift, NULL_RTX, 0);
4431                         t3 = expand_shift
4432                           (RSHIFT_EXPR, compute_mode, op0,
4433                            size - 1, NULL_RTX, 0);
4434                         if (d < 0)
4435                           quotient
4436                             = force_operand (gen_rtx_MINUS (compute_mode,
4437                                                             t3, t2),
4438                                              tquotient);
4439                         else
4440                           quotient
4441                             = force_operand (gen_rtx_MINUS (compute_mode,
4442                                                             t2, t3),
4443                                              tquotient);
4444                       }
4445                     else
4446                       {
4447                         rtx t1, t2, t3, t4;
4448
4449                         if (post_shift >= BITS_PER_WORD
4450                             || size - 1 >= BITS_PER_WORD)
4451                           goto fail1;
4452
4453                         ml |= HOST_WIDE_INT_M1U << (size - 1);
4454                         mlr = gen_int_mode (ml, compute_mode);
4455                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4456                                       + shift_cost (speed, compute_mode, size - 1)
4457                                       + 2 * add_cost (speed, compute_mode));
4458                         t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4459                                                    NULL_RTX, 0,
4460                                                    max_cost - extra_cost);
4461                         if (t1 == 0)
4462                           goto fail1;
4463                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4464                                                           t1, op0),
4465                                             NULL_RTX);
4466                         t3 = expand_shift
4467                           (RSHIFT_EXPR, compute_mode, t2,
4468                            post_shift, NULL_RTX, 0);
4469                         t4 = expand_shift
4470                           (RSHIFT_EXPR, compute_mode, op0,
4471                            size - 1, NULL_RTX, 0);
4472                         if (d < 0)
4473                           quotient
4474                             = force_operand (gen_rtx_MINUS (compute_mode,
4475                                                             t4, t3),
4476                                              tquotient);
4477                         else
4478                           quotient
4479                             = force_operand (gen_rtx_MINUS (compute_mode,
4480                                                             t3, t4),
4481                                              tquotient);
4482                       }
4483                   }
4484                 else            /* Too wide mode to use tricky code */
4485                   break;
4486
4487                 insn = get_last_insn ();
4488                 if (insn != last)
4489                   set_dst_reg_note (insn, REG_EQUAL,
4490                                     gen_rtx_DIV (compute_mode, op0, op1),
4491                                     quotient);
4492               }
4493             break;
4494           }
4495       fail1:
4496         delete_insns_since (last);
4497         break;
4498
4499       case FLOOR_DIV_EXPR:
4500       case FLOOR_MOD_EXPR:
4501       /* We will come here only for signed operations.  */
4502         if (op1_is_constant && size <= HOST_BITS_PER_WIDE_INT)
4503           {
4504             unsigned HOST_WIDE_INT mh, ml;
4505             int pre_shift, lgup, post_shift;
4506             HOST_WIDE_INT d = INTVAL (op1);
4507
4508             if (d > 0)
4509               {
4510                 /* We could just as easily deal with negative constants here,
4511                    but it does not seem worth the trouble for GCC 2.6.  */
4512                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4513                   {
4514                     pre_shift = floor_log2 (d);
4515                     if (rem_flag)
4516                       {
4517                         unsigned HOST_WIDE_INT mask
4518                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4519                         remainder = expand_binop
4520                           (compute_mode, and_optab, op0,
4521                            gen_int_mode (mask, compute_mode),
4522                            remainder, 0, OPTAB_LIB_WIDEN);
4523                         if (remainder)
4524                           return gen_lowpart (mode, remainder);
4525                       }
4526                     quotient = expand_shift
4527                       (RSHIFT_EXPR, compute_mode, op0,
4528                        pre_shift, tquotient, 0);
4529                   }
4530                 else
4531                   {
4532                     rtx t1, t2, t3, t4;
4533
4534                     mh = choose_multiplier (d, size, size - 1,
4535                                             &ml, &post_shift, &lgup);
4536                     gcc_assert (!mh);
4537
4538                     if (post_shift < BITS_PER_WORD
4539                         && size - 1 < BITS_PER_WORD)
4540                       {
4541                         t1 = expand_shift
4542                           (RSHIFT_EXPR, compute_mode, op0,
4543                            size - 1, NULL_RTX, 0);
4544                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4545                                            NULL_RTX, 0, OPTAB_WIDEN);
4546                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4547                                       + shift_cost (speed, compute_mode, size - 1)
4548                                       + 2 * add_cost (speed, compute_mode));
4549                         t3 = expmed_mult_highpart
4550                           (compute_mode, t2, gen_int_mode (ml, compute_mode),
4551                            NULL_RTX, 1, max_cost - extra_cost);
4552                         if (t3 != 0)
4553                           {
4554                             t4 = expand_shift
4555                               (RSHIFT_EXPR, compute_mode, t3,
4556                                post_shift, NULL_RTX, 1);
4557                             quotient = expand_binop (compute_mode, xor_optab,
4558                                                      t4, t1, tquotient, 0,
4559                                                      OPTAB_WIDEN);
4560                           }
4561                       }
4562                   }
4563               }
4564             else
4565               {
4566                 rtx nsign, t1, t2, t3, t4;
4567                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4568                                                   op0, constm1_rtx), NULL_RTX);
4569                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4570                                    0, OPTAB_WIDEN);
4571                 nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
4572                                       size - 1, NULL_RTX, 0);
4573                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4574                                     NULL_RTX);
4575                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4576                                     NULL_RTX, 0);
4577                 if (t4)
4578                   {
4579                     rtx t5;
4580                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4581                                       NULL_RTX, 0);
4582                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4583                                                             t4, t5),
4584                                               tquotient);
4585                   }
4586               }
4587           }
4588
4589         if (quotient != 0)
4590           break;
4591         delete_insns_since (last);
4592
4593         /* Try using an instruction that produces both the quotient and
4594            remainder, using truncation.  We can easily compensate the quotient
4595            or remainder to get floor rounding, once we have the remainder.
4596            Notice that we compute also the final remainder value here,
4597            and return the result right away.  */
4598         if (target == 0 || GET_MODE (target) != compute_mode)
4599           target = gen_reg_rtx (compute_mode);
4600
4601         if (rem_flag)
4602           {
4603             remainder
4604               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4605             quotient = gen_reg_rtx (compute_mode);
4606           }
4607         else
4608           {
4609             quotient
4610               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4611             remainder = gen_reg_rtx (compute_mode);
4612           }
4613
4614         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4615                                  quotient, remainder, 0))
4616           {
4617             /* This could be computed with a branch-less sequence.
4618                Save that for later.  */
4619             rtx tem;
4620             rtx_code_label *label = gen_label_rtx ();
4621             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4622             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4623                                 NULL_RTX, 0, OPTAB_WIDEN);
4624             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4625             expand_dec (quotient, const1_rtx);
4626             expand_inc (remainder, op1);
4627             emit_label (label);
4628             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4629           }
4630
4631         /* No luck with division elimination or divmod.  Have to do it
4632            by conditionally adjusting op0 *and* the result.  */
4633         {
4634           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4635           rtx adjusted_op0;
4636           rtx tem;
4637
4638           quotient = gen_reg_rtx (compute_mode);
4639           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4640           label1 = gen_label_rtx ();
4641           label2 = gen_label_rtx ();
4642           label3 = gen_label_rtx ();
4643           label4 = gen_label_rtx ();
4644           label5 = gen_label_rtx ();
4645           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4646           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4647           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4648                               quotient, 0, OPTAB_LIB_WIDEN);
4649           if (tem != quotient)
4650             emit_move_insn (quotient, tem);
4651           emit_jump_insn (targetm.gen_jump (label5));
4652           emit_barrier ();
4653           emit_label (label1);
4654           expand_inc (adjusted_op0, const1_rtx);
4655           emit_jump_insn (targetm.gen_jump (label4));
4656           emit_barrier ();
4657           emit_label (label2);
4658           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4659           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4660                               quotient, 0, OPTAB_LIB_WIDEN);
4661           if (tem != quotient)
4662             emit_move_insn (quotient, tem);
4663           emit_jump_insn (targetm.gen_jump (label5));
4664           emit_barrier ();
4665           emit_label (label3);
4666           expand_dec (adjusted_op0, const1_rtx);
4667           emit_label (label4);
4668           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4669                               quotient, 0, OPTAB_LIB_WIDEN);
4670           if (tem != quotient)
4671             emit_move_insn (quotient, tem);
4672           expand_dec (quotient, const1_rtx);
4673           emit_label (label5);
4674         }
4675         break;
4676
4677       case CEIL_DIV_EXPR:
4678       case CEIL_MOD_EXPR:
4679         if (unsignedp)
4680           {
4681             if (op1_is_constant
4682                 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4683                 && (size <= HOST_BITS_PER_WIDE_INT
4684                     || INTVAL (op1) >= 0))
4685               {
4686                 rtx t1, t2, t3;
4687                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4688                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4689                                    floor_log2 (d), tquotient, 1);
4690                 t2 = expand_binop (compute_mode, and_optab, op0,
4691                                    gen_int_mode (d - 1, compute_mode),
4692                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4693                 t3 = gen_reg_rtx (compute_mode);
4694                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4695                                       compute_mode, 1, 1);
4696                 if (t3 == 0)
4697                   {
4698                     rtx_code_label *lab;
4699                     lab = gen_label_rtx ();
4700                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4701                     expand_inc (t1, const1_rtx);
4702                     emit_label (lab);
4703                     quotient = t1;
4704                   }
4705                 else
4706                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4707                                                           t1, t3),
4708                                             tquotient);
4709                 break;
4710               }
4711
4712             /* Try using an instruction that produces both the quotient and
4713                remainder, using truncation.  We can easily compensate the
4714                quotient or remainder to get ceiling rounding, once we have the
4715                remainder.  Notice that we compute also the final remainder
4716                value here, and return the result right away.  */
4717             if (target == 0 || GET_MODE (target) != compute_mode)
4718               target = gen_reg_rtx (compute_mode);
4719
4720             if (rem_flag)
4721               {
4722                 remainder = (REG_P (target)
4723                              ? target : gen_reg_rtx (compute_mode));
4724                 quotient = gen_reg_rtx (compute_mode);
4725               }
4726             else
4727               {
4728                 quotient = (REG_P (target)
4729                             ? target : gen_reg_rtx (compute_mode));
4730                 remainder = gen_reg_rtx (compute_mode);
4731               }
4732
4733             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4734                                      remainder, 1))
4735               {
4736                 /* This could be computed with a branch-less sequence.
4737                    Save that for later.  */
4738                 rtx_code_label *label = gen_label_rtx ();
4739                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4740                                  compute_mode, label);
4741                 expand_inc (quotient, const1_rtx);
4742                 expand_dec (remainder, op1);
4743                 emit_label (label);
4744                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4745               }
4746
4747             /* No luck with division elimination or divmod.  Have to do it
4748                by conditionally adjusting op0 *and* the result.  */
4749             {
4750               rtx_code_label *label1, *label2;
4751               rtx adjusted_op0, tem;
4752
4753               quotient = gen_reg_rtx (compute_mode);
4754               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4755               label1 = gen_label_rtx ();
4756               label2 = gen_label_rtx ();
4757               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4758                                compute_mode, label1);
4759               emit_move_insn  (quotient, const0_rtx);
4760               emit_jump_insn (targetm.gen_jump (label2));
4761               emit_barrier ();
4762               emit_label (label1);
4763               expand_dec (adjusted_op0, const1_rtx);
4764               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4765                                   quotient, 1, OPTAB_LIB_WIDEN);
4766               if (tem != quotient)
4767                 emit_move_insn (quotient, tem);
4768               expand_inc (quotient, const1_rtx);
4769               emit_label (label2);
4770             }
4771           }
4772         else /* signed */
4773           {
4774             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4775                 && INTVAL (op1) >= 0)
4776               {
4777                 /* This is extremely similar to the code for the unsigned case
4778                    above.  For 2.7 we should merge these variants, but for
4779                    2.6.1 I don't want to touch the code for unsigned since that
4780                    get used in C.  The signed case will only be used by other
4781                    languages (Ada).  */
4782
4783                 rtx t1, t2, t3;
4784                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4785                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4786                                    floor_log2 (d), tquotient, 0);
4787                 t2 = expand_binop (compute_mode, and_optab, op0,
4788                                    gen_int_mode (d - 1, compute_mode),
4789                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4790                 t3 = gen_reg_rtx (compute_mode);
4791                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4792                                       compute_mode, 1, 1);
4793                 if (t3 == 0)
4794                   {
4795                     rtx_code_label *lab;
4796                     lab = gen_label_rtx ();
4797                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4798                     expand_inc (t1, const1_rtx);
4799                     emit_label (lab);
4800                     quotient = t1;
4801                   }
4802                 else
4803                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4804                                                           t1, t3),
4805                                             tquotient);
4806                 break;
4807               }
4808
4809             /* Try using an instruction that produces both the quotient and
4810                remainder, using truncation.  We can easily compensate the
4811                quotient or remainder to get ceiling rounding, once we have the
4812                remainder.  Notice that we compute also the final remainder
4813                value here, and return the result right away.  */
4814             if (target == 0 || GET_MODE (target) != compute_mode)
4815               target = gen_reg_rtx (compute_mode);
4816             if (rem_flag)
4817               {
4818                 remainder= (REG_P (target)
4819                             ? target : gen_reg_rtx (compute_mode));
4820                 quotient = gen_reg_rtx (compute_mode);
4821               }
4822             else
4823               {
4824                 quotient = (REG_P (target)
4825                             ? target : gen_reg_rtx (compute_mode));
4826                 remainder = gen_reg_rtx (compute_mode);
4827               }
4828
4829             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4830                                      remainder, 0))
4831               {
4832                 /* This could be computed with a branch-less sequence.
4833                    Save that for later.  */
4834                 rtx tem;
4835                 rtx_code_label *label = gen_label_rtx ();
4836                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4837                                  compute_mode, label);
4838                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4839                                     NULL_RTX, 0, OPTAB_WIDEN);
4840                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4841                 expand_inc (quotient, const1_rtx);
4842                 expand_dec (remainder, op1);
4843                 emit_label (label);
4844                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4845               }
4846
4847             /* No luck with division elimination or divmod.  Have to do it
4848                by conditionally adjusting op0 *and* the result.  */
4849             {
4850               rtx_code_label *label1, *label2, *label3, *label4, *label5;
4851               rtx adjusted_op0;
4852               rtx tem;
4853
4854               quotient = gen_reg_rtx (compute_mode);
4855               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4856               label1 = gen_label_rtx ();
4857               label2 = gen_label_rtx ();
4858               label3 = gen_label_rtx ();
4859               label4 = gen_label_rtx ();
4860               label5 = gen_label_rtx ();
4861               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4862               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4863                                compute_mode, label1);
4864               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4865                                   quotient, 0, OPTAB_LIB_WIDEN);
4866               if (tem != quotient)
4867                 emit_move_insn (quotient, tem);
4868               emit_jump_insn (targetm.gen_jump (label5));
4869               emit_barrier ();
4870               emit_label (label1);
4871               expand_dec (adjusted_op0, const1_rtx);
4872               emit_jump_insn (targetm.gen_jump (label4));
4873               emit_barrier ();
4874               emit_label (label2);
4875               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4876                                compute_mode, label3);
4877               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4878                                   quotient, 0, OPTAB_LIB_WIDEN);
4879               if (tem != quotient)
4880                 emit_move_insn (quotient, tem);
4881               emit_jump_insn (targetm.gen_jump (label5));
4882               emit_barrier ();
4883               emit_label (label3);
4884               expand_inc (adjusted_op0, const1_rtx);
4885               emit_label (label4);
4886               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4887                                   quotient, 0, OPTAB_LIB_WIDEN);
4888               if (tem != quotient)
4889                 emit_move_insn (quotient, tem);
4890               expand_inc (quotient, const1_rtx);
4891               emit_label (label5);
4892             }
4893           }
4894         break;
4895
4896       case EXACT_DIV_EXPR:
4897         if (op1_is_constant && size <= HOST_BITS_PER_WIDE_INT)
4898           {
4899             HOST_WIDE_INT d = INTVAL (op1);
4900             unsigned HOST_WIDE_INT ml;
4901             int pre_shift;
4902             rtx t1;
4903
4904             pre_shift = ctz_or_zero (d);
4905             ml = invert_mod2n (d >> pre_shift, size);
4906             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4907                                pre_shift, NULL_RTX, unsignedp);
4908             quotient = expand_mult (compute_mode, t1,
4909                                     gen_int_mode (ml, compute_mode),
4910                                     NULL_RTX, 1);
4911
4912             insn = get_last_insn ();
4913             set_dst_reg_note (insn, REG_EQUAL,
4914                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4915                                               compute_mode, op0, op1),
4916                               quotient);
4917           }
4918         break;
4919
4920       case ROUND_DIV_EXPR:
4921       case ROUND_MOD_EXPR:
4922         if (unsignedp)
4923           {
4924             rtx tem;
4925             rtx_code_label *label;
4926             label = gen_label_rtx ();
4927             quotient = gen_reg_rtx (compute_mode);
4928             remainder = gen_reg_rtx (compute_mode);
4929             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4930               {
4931                 rtx tem;
4932                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4933                                          quotient, 1, OPTAB_LIB_WIDEN);
4934                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4935                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4936                                           remainder, 1, OPTAB_LIB_WIDEN);
4937               }
4938             tem = plus_constant (compute_mode, op1, -1);
4939             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4940             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4941             expand_inc (quotient, const1_rtx);
4942             expand_dec (remainder, op1);
4943             emit_label (label);
4944           }
4945         else
4946           {
4947             rtx abs_rem, abs_op1, tem, mask;
4948             rtx_code_label *label;
4949             label = gen_label_rtx ();
4950             quotient = gen_reg_rtx (compute_mode);
4951             remainder = gen_reg_rtx (compute_mode);
4952             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4953               {
4954                 rtx tem;
4955                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4956                                          quotient, 0, OPTAB_LIB_WIDEN);
4957                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4958                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4959                                           remainder, 0, OPTAB_LIB_WIDEN);
4960               }
4961             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4962             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4963             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4964                                 1, NULL_RTX, 1);
4965             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4966             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4967                                 NULL_RTX, 0, OPTAB_WIDEN);
4968             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4969                                  size - 1, NULL_RTX, 0);
4970             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4971                                 NULL_RTX, 0, OPTAB_WIDEN);
4972             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4973                                 NULL_RTX, 0, OPTAB_WIDEN);
4974             expand_inc (quotient, tem);
4975             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4976                                 NULL_RTX, 0, OPTAB_WIDEN);
4977             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4978                                 NULL_RTX, 0, OPTAB_WIDEN);
4979             expand_dec (remainder, tem);
4980             emit_label (label);
4981           }
4982         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4983
4984       default:
4985         gcc_unreachable ();
4986       }
4987
4988   if (quotient == 0)
4989     {
4990       if (target && GET_MODE (target) != compute_mode)
4991         target = 0;
4992
4993       if (rem_flag)
4994         {
4995           /* Try to produce the remainder without producing the quotient.
4996              If we seem to have a divmod pattern that does not require widening,
4997              don't try widening here.  We should really have a WIDEN argument
4998              to expand_twoval_binop, since what we'd really like to do here is
4999              1) try a mod insn in compute_mode
5000              2) try a divmod insn in compute_mode
5001              3) try a div insn in compute_mode and multiply-subtract to get
5002                 remainder
5003              4) try the same things with widening allowed.  */
5004           remainder
5005             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5006                                  op0, op1, target,
5007                                  unsignedp,
5008                                  ((optab_handler (optab2, compute_mode)
5009                                    != CODE_FOR_nothing)
5010                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
5011           if (remainder == 0)
5012             {
5013               /* No luck there.  Can we do remainder and divide at once
5014                  without a library call?  */
5015               remainder = gen_reg_rtx (compute_mode);
5016               if (! expand_twoval_binop ((unsignedp
5017                                           ? udivmod_optab
5018                                           : sdivmod_optab),
5019                                          op0, op1,
5020                                          NULL_RTX, remainder, unsignedp))
5021                 remainder = 0;
5022             }
5023
5024           if (remainder)
5025             return gen_lowpart (mode, remainder);
5026         }
5027
5028       /* Produce the quotient.  Try a quotient insn, but not a library call.
5029          If we have a divmod in this mode, use it in preference to widening
5030          the div (for this test we assume it will not fail). Note that optab2
5031          is set to the one of the two optabs that the call below will use.  */
5032       quotient
5033         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5034                              op0, op1, rem_flag ? NULL_RTX : target,
5035                              unsignedp,
5036                              ((optab_handler (optab2, compute_mode)
5037                                != CODE_FOR_nothing)
5038                               ? OPTAB_DIRECT : OPTAB_WIDEN));
5039
5040       if (quotient == 0)
5041         {
5042           /* No luck there.  Try a quotient-and-remainder insn,
5043              keeping the quotient alone.  */
5044           quotient = gen_reg_rtx (compute_mode);
5045           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5046                                      op0, op1,
5047                                      quotient, NULL_RTX, unsignedp))
5048             {
5049               quotient = 0;
5050               if (! rem_flag)
5051                 /* Still no luck.  If we are not computing the remainder,
5052                    use a library call for the quotient.  */
5053                 quotient = sign_expand_binop (compute_mode,
5054                                               udiv_optab, sdiv_optab,
5055                                               op0, op1, target,
5056                                               unsignedp, OPTAB_LIB_WIDEN);
5057             }
5058         }
5059     }
5060
5061   if (rem_flag)
5062     {
5063       if (target && GET_MODE (target) != compute_mode)
5064         target = 0;
5065
5066       if (quotient == 0)
5067         {
5068           /* No divide instruction either.  Use library for remainder.  */
5069           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5070                                          op0, op1, target,
5071                                          unsignedp, OPTAB_LIB_WIDEN);
5072           /* No remainder function.  Try a quotient-and-remainder
5073              function, keeping the remainder.  */
5074           if (!remainder)
5075             {
5076               remainder = gen_reg_rtx (compute_mode);
5077               if (!expand_twoval_binop_libfunc
5078                   (unsignedp ? udivmod_optab : sdivmod_optab,
5079                    op0, op1,
5080                    NULL_RTX, remainder,
5081                    unsignedp ? UMOD : MOD))
5082                 remainder = NULL_RTX;
5083             }
5084         }
5085       else
5086         {
5087           /* We divided.  Now finish doing X - Y * (X / Y).  */
5088           remainder = expand_mult (compute_mode, quotient, op1,
5089                                    NULL_RTX, unsignedp);
5090           remainder = expand_binop (compute_mode, sub_optab, op0,
5091                                     remainder, target, unsignedp,
5092                                     OPTAB_LIB_WIDEN);
5093         }
5094     }
5095
5096   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5097 }
5098 \f
5099 /* Return a tree node with data type TYPE, describing the value of X.
5100    Usually this is an VAR_DECL, if there is no obvious better choice.
5101    X may be an expression, however we only support those expressions
5102    generated by loop.c.  */
5103
5104 tree
5105 make_tree (tree type, rtx x)
5106 {
5107   tree t;
5108
5109   switch (GET_CODE (x))
5110     {
5111     case CONST_INT:
5112     case CONST_WIDE_INT:
5113       t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5114       return t;
5115
5116     case CONST_DOUBLE:
5117       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5118       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5119         t = wide_int_to_tree (type,
5120                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5121                                                     HOST_BITS_PER_WIDE_INT * 2));
5122       else
5123         t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5124
5125       return t;
5126
5127     case CONST_VECTOR:
5128       {
5129         int units = CONST_VECTOR_NUNITS (x);
5130         tree itype = TREE_TYPE (type);
5131         tree *elts;
5132         int i;
5133
5134         /* Build a tree with vector elements.  */
5135         elts = XALLOCAVEC (tree, units);
5136         for (i = units - 1; i >= 0; --i)
5137           {
5138             rtx elt = CONST_VECTOR_ELT (x, i);
5139             elts[i] = make_tree (itype, elt);
5140           }
5141
5142         return build_vector (type, elts);
5143       }
5144
5145     case PLUS:
5146       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5147                           make_tree (type, XEXP (x, 1)));
5148
5149     case MINUS:
5150       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5151                           make_tree (type, XEXP (x, 1)));
5152
5153     case NEG:
5154       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5155
5156     case MULT:
5157       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5158                           make_tree (type, XEXP (x, 1)));
5159
5160     case ASHIFT:
5161       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5162                           make_tree (type, XEXP (x, 1)));
5163
5164     case LSHIFTRT:
5165       t = unsigned_type_for (type);
5166       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5167                                          make_tree (t, XEXP (x, 0)),
5168                                          make_tree (type, XEXP (x, 1))));
5169
5170     case ASHIFTRT:
5171       t = signed_type_for (type);
5172       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5173                                          make_tree (t, XEXP (x, 0)),
5174                                          make_tree (type, XEXP (x, 1))));
5175
5176     case DIV:
5177       if (TREE_CODE (type) != REAL_TYPE)
5178         t = signed_type_for (type);
5179       else
5180         t = type;
5181
5182       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5183                                          make_tree (t, XEXP (x, 0)),
5184                                          make_tree (t, XEXP (x, 1))));
5185     case UDIV:
5186       t = unsigned_type_for (type);
5187       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5188                                          make_tree (t, XEXP (x, 0)),
5189                                          make_tree (t, XEXP (x, 1))));
5190
5191     case SIGN_EXTEND:
5192     case ZERO_EXTEND:
5193       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5194                                           GET_CODE (x) == ZERO_EXTEND);
5195       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5196
5197     case CONST:
5198       return make_tree (type, XEXP (x, 0));
5199
5200     case SYMBOL_REF:
5201       t = SYMBOL_REF_DECL (x);
5202       if (t)
5203         return fold_convert (type, build_fold_addr_expr (t));
5204       /* fall through.  */
5205
5206     default:
5207       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5208
5209       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5210          address mode to pointer mode.  */
5211       if (POINTER_TYPE_P (type))
5212         x = convert_memory_address_addr_space
5213               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5214
5215       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5216          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5217       t->decl_with_rtl.rtl = x;
5218
5219       return t;
5220     }
5221 }
5222 \f
5223 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5224    and returning TARGET.
5225
5226    If TARGET is 0, a pseudo-register or constant is returned.  */
5227
5228 rtx
5229 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5230 {
5231   rtx tem = 0;
5232
5233   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5234     tem = simplify_binary_operation (AND, mode, op0, op1);
5235   if (tem == 0)
5236     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5237
5238   if (target == 0)
5239     target = tem;
5240   else if (tem != target)
5241     emit_move_insn (target, tem);
5242   return target;
5243 }
5244
5245 /* Helper function for emit_store_flag.  */
5246 rtx
5247 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5248              machine_mode mode, machine_mode compare_mode,
5249              int unsignedp, rtx x, rtx y, int normalizep,
5250              machine_mode target_mode)
5251 {
5252   struct expand_operand ops[4];
5253   rtx op0, comparison, subtarget;
5254   rtx_insn *last;
5255   machine_mode result_mode = targetm.cstore_mode (icode);
5256
5257   last = get_last_insn ();
5258   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5259   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5260   if (!x || !y)
5261     {
5262       delete_insns_since (last);
5263       return NULL_RTX;
5264     }
5265
5266   if (target_mode == VOIDmode)
5267     target_mode = result_mode;
5268   if (!target)
5269     target = gen_reg_rtx (target_mode);
5270
5271   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5272
5273   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5274   create_fixed_operand (&ops[1], comparison);
5275   create_fixed_operand (&ops[2], x);
5276   create_fixed_operand (&ops[3], y);
5277   if (!maybe_expand_insn (icode, 4, ops))
5278     {
5279       delete_insns_since (last);
5280       return NULL_RTX;
5281     }
5282   subtarget = ops[0].value;
5283
5284   /* If we are converting to a wider mode, first convert to
5285      TARGET_MODE, then normalize.  This produces better combining
5286      opportunities on machines that have a SIGN_EXTRACT when we are
5287      testing a single bit.  This mostly benefits the 68k.
5288
5289      If STORE_FLAG_VALUE does not have the sign bit set when
5290      interpreted in MODE, we can do this conversion as unsigned, which
5291      is usually more efficient.  */
5292   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5293     {
5294       convert_move (target, subtarget,
5295                     val_signbit_known_clear_p (result_mode,
5296                                                STORE_FLAG_VALUE));
5297       op0 = target;
5298       result_mode = target_mode;
5299     }
5300   else
5301     op0 = subtarget;
5302
5303   /* If we want to keep subexpressions around, don't reuse our last
5304      target.  */
5305   if (optimize)
5306     subtarget = 0;
5307
5308   /* Now normalize to the proper value in MODE.  Sometimes we don't
5309      have to do anything.  */
5310   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5311     ;
5312   /* STORE_FLAG_VALUE might be the most negative number, so write
5313      the comparison this way to avoid a compiler-time warning.  */
5314   else if (- normalizep == STORE_FLAG_VALUE)
5315     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5316
5317   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5318      it hard to use a value of just the sign bit due to ANSI integer
5319      constant typing rules.  */
5320   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5321     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5322                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5323                         normalizep == 1);
5324   else
5325     {
5326       gcc_assert (STORE_FLAG_VALUE & 1);
5327
5328       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5329       if (normalizep == -1)
5330         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5331     }
5332
5333   /* If we were converting to a smaller mode, do the conversion now.  */
5334   if (target_mode != result_mode)
5335     {
5336       convert_move (target, op0, 0);
5337       return target;
5338     }
5339   else
5340     return op0;
5341 }
5342
5343
5344 /* A subroutine of emit_store_flag only including "tricks" that do not
5345    need a recursive call.  These are kept separate to avoid infinite
5346    loops.  */
5347
5348 static rtx
5349 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5350                    machine_mode mode, int unsignedp, int normalizep,
5351                    machine_mode target_mode)
5352 {
5353   rtx subtarget;
5354   enum insn_code icode;
5355   machine_mode compare_mode;
5356   enum mode_class mclass;
5357   enum rtx_code scode;
5358
5359   if (unsignedp)
5360     code = unsigned_condition (code);
5361   scode = swap_condition (code);
5362
5363   /* If one operand is constant, make it the second one.  Only do this
5364      if the other operand is not constant as well.  */
5365
5366   if (swap_commutative_operands_p (op0, op1))
5367     {
5368       std::swap (op0, op1);
5369       code = swap_condition (code);
5370     }
5371
5372   if (mode == VOIDmode)
5373     mode = GET_MODE (op0);
5374
5375   /* For some comparisons with 1 and -1, we can convert this to
5376      comparisons with zero.  This will often produce more opportunities for
5377      store-flag insns.  */
5378
5379   switch (code)
5380     {
5381     case LT:
5382       if (op1 == const1_rtx)
5383         op1 = const0_rtx, code = LE;
5384       break;
5385     case LE:
5386       if (op1 == constm1_rtx)
5387         op1 = const0_rtx, code = LT;
5388       break;
5389     case GE:
5390       if (op1 == const1_rtx)
5391         op1 = const0_rtx, code = GT;
5392       break;
5393     case GT:
5394       if (op1 == constm1_rtx)
5395         op1 = const0_rtx, code = GE;
5396       break;
5397     case GEU:
5398       if (op1 == const1_rtx)
5399         op1 = const0_rtx, code = NE;
5400       break;
5401     case LTU:
5402       if (op1 == const1_rtx)
5403         op1 = const0_rtx, code = EQ;
5404       break;
5405     default:
5406       break;
5407     }
5408
5409   /* If we are comparing a double-word integer with zero or -1, we can
5410      convert the comparison into one involving a single word.  */
5411   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5412       && GET_MODE_CLASS (mode) == MODE_INT
5413       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5414     {
5415       rtx tem;
5416       if ((code == EQ || code == NE)
5417           && (op1 == const0_rtx || op1 == constm1_rtx))
5418         {
5419           rtx op00, op01;
5420
5421           /* Do a logical OR or AND of the two words and compare the
5422              result.  */
5423           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5424           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5425           tem = expand_binop (word_mode,
5426                               op1 == const0_rtx ? ior_optab : and_optab,
5427                               op00, op01, NULL_RTX, unsignedp,
5428                               OPTAB_DIRECT);
5429
5430           if (tem != 0)
5431             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5432                                    unsignedp, normalizep);
5433         }
5434       else if ((code == LT || code == GE) && op1 == const0_rtx)
5435         {
5436           rtx op0h;
5437
5438           /* If testing the sign bit, can just test on high word.  */
5439           op0h = simplify_gen_subreg (word_mode, op0, mode,
5440                                       subreg_highpart_offset (word_mode,
5441                                                               mode));
5442           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5443                                  unsignedp, normalizep);
5444         }
5445       else
5446         tem = NULL_RTX;
5447
5448       if (tem)
5449         {
5450           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5451             return tem;
5452           if (!target)
5453             target = gen_reg_rtx (target_mode);
5454
5455           convert_move (target, tem,
5456                         !val_signbit_known_set_p (word_mode,
5457                                                   (normalizep ? normalizep
5458                                                    : STORE_FLAG_VALUE)));
5459           return target;
5460         }
5461     }
5462
5463   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5464      complement of A (for GE) and shifting the sign bit to the low bit.  */
5465   if (op1 == const0_rtx && (code == LT || code == GE)
5466       && GET_MODE_CLASS (mode) == MODE_INT
5467       && (normalizep || STORE_FLAG_VALUE == 1
5468           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5469     {
5470       subtarget = target;
5471
5472       if (!target)
5473         target_mode = mode;
5474
5475       /* If the result is to be wider than OP0, it is best to convert it
5476          first.  If it is to be narrower, it is *incorrect* to convert it
5477          first.  */
5478       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5479         {
5480           op0 = convert_modes (target_mode, mode, op0, 0);
5481           mode = target_mode;
5482         }
5483
5484       if (target_mode != mode)
5485         subtarget = 0;
5486
5487       if (code == GE)
5488         op0 = expand_unop (mode, one_cmpl_optab, op0,
5489                            ((STORE_FLAG_VALUE == 1 || normalizep)
5490                             ? 0 : subtarget), 0);
5491
5492       if (STORE_FLAG_VALUE == 1 || normalizep)
5493         /* If we are supposed to produce a 0/1 value, we want to do
5494            a logical shift from the sign bit to the low-order bit; for
5495            a -1/0 value, we do an arithmetic shift.  */
5496         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5497                             GET_MODE_BITSIZE (mode) - 1,
5498                             subtarget, normalizep != -1);
5499
5500       if (mode != target_mode)
5501         op0 = convert_modes (target_mode, mode, op0, 0);
5502
5503       return op0;
5504     }
5505
5506   mclass = GET_MODE_CLASS (mode);
5507   for (compare_mode = mode; compare_mode != VOIDmode;
5508        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5509     {
5510      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5511      icode = optab_handler (cstore_optab, optab_mode);
5512      if (icode != CODE_FOR_nothing)
5513         {
5514           do_pending_stack_adjust ();
5515           rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5516                                  unsignedp, op0, op1, normalizep, target_mode);
5517           if (tem)
5518             return tem;
5519
5520           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5521             {
5522               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5523                                  unsignedp, op1, op0, normalizep, target_mode);
5524               if (tem)
5525                 return tem;
5526             }
5527           break;
5528         }
5529     }
5530
5531   return 0;
5532 }
5533
5534 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5535    and storing in TARGET.  Normally return TARGET.
5536    Return 0 if that cannot be done.
5537
5538    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5539    it is VOIDmode, they cannot both be CONST_INT.
5540
5541    UNSIGNEDP is for the case where we have to widen the operands
5542    to perform the operation.  It says to use zero-extension.
5543
5544    NORMALIZEP is 1 if we should convert the result to be either zero
5545    or one.  Normalize is -1 if we should convert the result to be
5546    either zero or -1.  If NORMALIZEP is zero, the result will be left
5547    "raw" out of the scc insn.  */
5548
5549 rtx
5550 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5551                  machine_mode mode, int unsignedp, int normalizep)
5552 {
5553   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5554   enum rtx_code rcode;
5555   rtx subtarget;
5556   rtx tem, trueval;
5557   rtx_insn *last;
5558
5559   /* If we compare constants, we shouldn't use a store-flag operation,
5560      but a constant load.  We can get there via the vanilla route that
5561      usually generates a compare-branch sequence, but will in this case
5562      fold the comparison to a constant, and thus elide the branch.  */
5563   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5564     return NULL_RTX;
5565
5566   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5567                            target_mode);
5568   if (tem)
5569     return tem;
5570
5571   /* If we reached here, we can't do this with a scc insn, however there
5572      are some comparisons that can be done in other ways.  Don't do any
5573      of these cases if branches are very cheap.  */
5574   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5575     return 0;
5576
5577   /* See what we need to return.  We can only return a 1, -1, or the
5578      sign bit.  */
5579
5580   if (normalizep == 0)
5581     {
5582       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5583         normalizep = STORE_FLAG_VALUE;
5584
5585       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5586         ;
5587       else
5588         return 0;
5589     }
5590
5591   last = get_last_insn ();
5592
5593   /* If optimizing, use different pseudo registers for each insn, instead
5594      of reusing the same pseudo.  This leads to better CSE, but slows
5595      down the compiler, since there are more pseudos */
5596   subtarget = (!optimize
5597                && (target_mode == mode)) ? target : NULL_RTX;
5598   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5599
5600   /* For floating-point comparisons, try the reverse comparison or try
5601      changing the "orderedness" of the comparison.  */
5602   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5603     {
5604       enum rtx_code first_code;
5605       bool and_them;
5606
5607       rcode = reverse_condition_maybe_unordered (code);
5608       if (can_compare_p (rcode, mode, ccp_store_flag)
5609           && (code == ORDERED || code == UNORDERED
5610               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5611               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5612         {
5613           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5614                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5615
5616           /* For the reverse comparison, use either an addition or a XOR.  */
5617           if (want_add
5618               && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5619                            optimize_insn_for_speed_p ()) == 0)
5620             {
5621               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5622                                        STORE_FLAG_VALUE, target_mode);
5623               if (tem)
5624                 return expand_binop (target_mode, add_optab, tem,
5625                                      gen_int_mode (normalizep, target_mode),
5626                                      target, 0, OPTAB_WIDEN);
5627             }
5628           else if (!want_add
5629                    && rtx_cost (trueval, mode, XOR, 1,
5630                                 optimize_insn_for_speed_p ()) == 0)
5631             {
5632               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5633                                        normalizep, target_mode);
5634               if (tem)
5635                 return expand_binop (target_mode, xor_optab, tem, trueval,
5636                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5637             }
5638         }
5639
5640       delete_insns_since (last);
5641
5642       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5643       if (code == ORDERED || code == UNORDERED)
5644         return 0;
5645
5646       and_them = split_comparison (code, mode, &first_code, &code);
5647
5648       /* If there are no NaNs, the first comparison should always fall through.
5649          Effectively change the comparison to the other one.  */
5650       if (!HONOR_NANS (mode))
5651         {
5652           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5653           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5654                                     target_mode);
5655         }
5656
5657       if (!HAVE_conditional_move)
5658         return 0;
5659
5660       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5661          conditional move.  */
5662       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5663                                normalizep, target_mode);
5664       if (tem == 0)
5665         return 0;
5666
5667       if (and_them)
5668         tem = emit_conditional_move (target, code, op0, op1, mode,
5669                                      tem, const0_rtx, GET_MODE (tem), 0);
5670       else
5671         tem = emit_conditional_move (target, code, op0, op1, mode,
5672                                      trueval, tem, GET_MODE (tem), 0);
5673
5674       if (tem == 0)
5675         delete_insns_since (last);
5676       return tem;
5677     }
5678
5679   /* The remaining tricks only apply to integer comparisons.  */
5680
5681   if (GET_MODE_CLASS (mode) != MODE_INT)
5682     return 0;
5683
5684   /* If this is an equality comparison of integers, we can try to exclusive-or
5685      (or subtract) the two operands and use a recursive call to try the
5686      comparison with zero.  Don't do any of these cases if branches are
5687      very cheap.  */
5688
5689   if ((code == EQ || code == NE) && op1 != const0_rtx)
5690     {
5691       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5692                           OPTAB_WIDEN);
5693
5694       if (tem == 0)
5695         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5696                             OPTAB_WIDEN);
5697       if (tem != 0)
5698         tem = emit_store_flag (target, code, tem, const0_rtx,
5699                                mode, unsignedp, normalizep);
5700       if (tem != 0)
5701         return tem;
5702
5703       delete_insns_since (last);
5704     }
5705
5706   /* For integer comparisons, try the reverse comparison.  However, for
5707      small X and if we'd have anyway to extend, implementing "X != 0"
5708      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5709   rcode = reverse_condition (code);
5710   if (can_compare_p (rcode, mode, ccp_store_flag)
5711       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5712             && code == NE
5713             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5714             && op1 == const0_rtx))
5715     {
5716       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5717                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5718
5719       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5720       if (want_add
5721           && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5722                        optimize_insn_for_speed_p ()) == 0)
5723         {
5724           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5725                                    STORE_FLAG_VALUE, target_mode);
5726           if (tem != 0)
5727             tem = expand_binop (target_mode, add_optab, tem,
5728                                 gen_int_mode (normalizep, target_mode),
5729                                 target, 0, OPTAB_WIDEN);
5730         }
5731       else if (!want_add
5732                && rtx_cost (trueval, mode, XOR, 1,
5733                             optimize_insn_for_speed_p ()) == 0)
5734         {
5735           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5736                                    normalizep, target_mode);
5737           if (tem != 0)
5738             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5739                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5740         }
5741
5742       if (tem != 0)
5743         return tem;
5744       delete_insns_since (last);
5745     }
5746
5747   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5748      the constant zero.  Reject all other comparisons at this point.  Only
5749      do LE and GT if branches are expensive since they are expensive on
5750      2-operand machines.  */
5751
5752   if (op1 != const0_rtx
5753       || (code != EQ && code != NE
5754           && (BRANCH_COST (optimize_insn_for_speed_p (),
5755                            false) <= 1 || (code != LE && code != GT))))
5756     return 0;
5757
5758   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5759      do the necessary operation below.  */
5760
5761   tem = 0;
5762
5763   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5764      the sign bit set.  */
5765
5766   if (code == LE)
5767     {
5768       /* This is destructive, so SUBTARGET can't be OP0.  */
5769       if (rtx_equal_p (subtarget, op0))
5770         subtarget = 0;
5771
5772       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5773                           OPTAB_WIDEN);
5774       if (tem)
5775         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5776                             OPTAB_WIDEN);
5777     }
5778
5779   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5780      number of bits in the mode of OP0, minus one.  */
5781
5782   if (code == GT)
5783     {
5784       if (rtx_equal_p (subtarget, op0))
5785         subtarget = 0;
5786
5787       tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5788                                 GET_MODE_BITSIZE (mode) - 1,
5789                                 subtarget, 0);
5790       if (tem)
5791         tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5792                             OPTAB_WIDEN);
5793     }
5794
5795   if (code == EQ || code == NE)
5796     {
5797       /* For EQ or NE, one way to do the comparison is to apply an operation
5798          that converts the operand into a positive number if it is nonzero
5799          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5800          for NE we negate.  This puts the result in the sign bit.  Then we
5801          normalize with a shift, if needed.
5802
5803          Two operations that can do the above actions are ABS and FFS, so try
5804          them.  If that doesn't work, and MODE is smaller than a full word,
5805          we can use zero-extension to the wider mode (an unsigned conversion)
5806          as the operation.  */
5807
5808       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5809          that is compensated by the subsequent overflow when subtracting
5810          one / negating.  */
5811
5812       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5813         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5814       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5815         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5816       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5817         {
5818           tem = convert_modes (word_mode, mode, op0, 1);
5819           mode = word_mode;
5820         }
5821
5822       if (tem != 0)
5823         {
5824           if (code == EQ)
5825             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5826                                 0, OPTAB_WIDEN);
5827           else
5828             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5829         }
5830
5831       /* If we couldn't do it that way, for NE we can "or" the two's complement
5832          of the value with itself.  For EQ, we take the one's complement of
5833          that "or", which is an extra insn, so we only handle EQ if branches
5834          are expensive.  */
5835
5836       if (tem == 0
5837           && (code == NE
5838               || BRANCH_COST (optimize_insn_for_speed_p (),
5839                               false) > 1))
5840         {
5841           if (rtx_equal_p (subtarget, op0))
5842             subtarget = 0;
5843
5844           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5845           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5846                               OPTAB_WIDEN);
5847
5848           if (tem && code == EQ)
5849             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5850         }
5851     }
5852
5853   if (tem && normalizep)
5854     tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5855                               GET_MODE_BITSIZE (mode) - 1,
5856                               subtarget, normalizep == 1);
5857
5858   if (tem)
5859     {
5860       if (!target)
5861         ;
5862       else if (GET_MODE (tem) != target_mode)
5863         {
5864           convert_move (target, tem, 0);
5865           tem = target;
5866         }
5867       else if (!subtarget)
5868         {
5869           emit_move_insn (target, tem);
5870           tem = target;
5871         }
5872     }
5873   else
5874     delete_insns_since (last);
5875
5876   return tem;
5877 }
5878
5879 /* Like emit_store_flag, but always succeeds.  */
5880
5881 rtx
5882 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5883                        machine_mode mode, int unsignedp, int normalizep)
5884 {
5885   rtx tem;
5886   rtx_code_label *label;
5887   rtx trueval, falseval;
5888
5889   /* First see if emit_store_flag can do the job.  */
5890   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5891   if (tem != 0)
5892     return tem;
5893
5894   if (!target)
5895     target = gen_reg_rtx (word_mode);
5896
5897   /* If this failed, we have to do this with set/compare/jump/set code.
5898      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5899   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5900   if (code == NE
5901       && GET_MODE_CLASS (mode) == MODE_INT
5902       && REG_P (target)
5903       && op0 == target
5904       && op1 == const0_rtx)
5905     {
5906       label = gen_label_rtx ();
5907       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
5908                                NULL_RTX, NULL, label,
5909                                profile_probability::uninitialized ());
5910       emit_move_insn (target, trueval);
5911       emit_label (label);
5912       return target;
5913     }
5914
5915   if (!REG_P (target)
5916       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5917     target = gen_reg_rtx (GET_MODE (target));
5918
5919   /* Jump in the right direction if the target cannot implement CODE
5920      but can jump on its reverse condition.  */
5921   falseval = const0_rtx;
5922   if (! can_compare_p (code, mode, ccp_jump)
5923       && (! FLOAT_MODE_P (mode)
5924           || code == ORDERED || code == UNORDERED
5925           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5926           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5927     {
5928       enum rtx_code rcode;
5929       if (FLOAT_MODE_P (mode))
5930         rcode = reverse_condition_maybe_unordered (code);
5931       else
5932         rcode = reverse_condition (code);
5933
5934       /* Canonicalize to UNORDERED for the libcall.  */
5935       if (can_compare_p (rcode, mode, ccp_jump)
5936           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5937         {
5938           falseval = trueval;
5939           trueval = const0_rtx;
5940           code = rcode;
5941         }
5942     }
5943
5944   emit_move_insn (target, trueval);
5945   label = gen_label_rtx ();
5946   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
5947                            label, profile_probability::uninitialized ());
5948
5949   emit_move_insn (target, falseval);
5950   emit_label (label);
5951
5952   return target;
5953 }
5954 \f
5955 /* Perform possibly multi-word comparison and conditional jump to LABEL
5956    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5957    now a thin wrapper around do_compare_rtx_and_jump.  */
5958
5959 static void
5960 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
5961                  rtx_code_label *label)
5962 {
5963   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5964   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
5965                            NULL, label, profile_probability::uninitialized ());
5966 }