gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2017 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "target.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "predict.h"
  30 #include "memmodel.h"
  31 #include "tm_p.h"
  32 #include "expmed.h"
  33 #include "optabs.h"
  34 #include "emit-rtl.h"
  35 #include "diagnostic-core.h"
  36 #include "fold-const.h"
  37 #include "stor-layout.h"
  38 #include "dojump.h"
  39 #include "explow.h"
  40 #include "expr.h"
  41 #include "langhooks.h"
  42
  43 struct target_expmed default_target_expmed;
  44 #if SWITCHABLE_TARGET
  45 struct target_expmed *this_target_expmed = &default_target_expmed;
  46 #endif
  47
  48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  49                                    unsigned HOST_WIDE_INT,
  50                                    unsigned HOST_WIDE_INT,
  51                                    unsigned HOST_WIDE_INT,
  52                                    rtx, bool);
  53 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
  54                                      unsigned HOST_WIDE_INT,
  55                                      rtx, bool);
  56 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  57                                    unsigned HOST_WIDE_INT,
  58                                    unsigned HOST_WIDE_INT,
  59                                    unsigned HOST_WIDE_INT,
  60                                    rtx, bool);
  61 static rtx extract_fixed_bit_field (machine_mode, rtx,
  62                                     unsigned HOST_WIDE_INT,
  63                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  64 static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
  65                                       unsigned HOST_WIDE_INT,
  66                                       unsigned HOST_WIDE_INT, rtx, int, bool);
  67 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  68 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  69                                     unsigned HOST_WIDE_INT, int, bool);
  70 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  71 static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  72 static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  73
  74 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  75    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  76    The mask is truncated if necessary to the width of mode MODE.  The
  77    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
  78
  79 static inline rtx
  80 mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
  81 {
  82   return immed_wide_int_const
  83     (wi::shifted_mask (bitpos, bitsize, complement,
  84                        GET_MODE_PRECISION (mode)), mode);
  85 }
  86
  87 /* Test whether a value is zero of a power of two.  */
  88 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
  89   (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
  90
  91 struct init_expmed_rtl
  92 {
  93   rtx reg;
  94   rtx plus;
  95   rtx neg;
  96   rtx mult;
  97   rtx sdiv;
  98   rtx udiv;
  99   rtx sdiv_32;
 100   rtx smod_32;
 101   rtx wide_mult;
 102   rtx wide_lshr;
 103   rtx wide_trunc;
 104   rtx shift;
 105   rtx shift_mult;
 106   rtx shift_add;
 107   rtx shift_sub0;
 108   rtx shift_sub1;
 109   rtx zext;
 110   rtx trunc;
 111
 112   rtx pow2[MAX_BITS_PER_WORD];
 113   rtx cint[MAX_BITS_PER_WORD];
 114 };
 115
 116 static void
 117 init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
 118                       machine_mode from_mode, bool speed)
 119 {
 120   int to_size, from_size;
 121   rtx which;
 122
 123   to_size = GET_MODE_PRECISION (to_mode);
 124   from_size = GET_MODE_PRECISION (from_mode);
 125
 126   /* Most partial integers have a precision less than the "full"
 127      integer it requires for storage.  In case one doesn't, for
 128      comparison purposes here, reduce the bit size by one in that
 129      case.  */
 130   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 131       && pow2p_hwi (to_size))
 132     to_size --;
 133   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 134       && pow2p_hwi (from_size))
 135     from_size --;
 136
 137   /* Assume cost of zero-extend and sign-extend is the same.  */
 138   which = (to_size < from_size ? all->trunc : all->zext);
 139
 140   PUT_MODE (all->reg, from_mode);
 141   set_convert_cost (to_mode, from_mode, speed,
 142                     set_src_cost (which, to_mode, speed));
 143 }
 144
 145 static void
 146 init_expmed_one_mode (struct init_expmed_rtl *all,
 147                       machine_mode mode, int speed)
 148 {
 149   int m, n, mode_bitsize;
 150   machine_mode mode_from;
 151
 152   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 153
 154   PUT_MODE (all->reg, mode);
 155   PUT_MODE (all->plus, mode);
 156   PUT_MODE (all->neg, mode);
 157   PUT_MODE (all->mult, mode);
 158   PUT_MODE (all->sdiv, mode);
 159   PUT_MODE (all->udiv, mode);
 160   PUT_MODE (all->sdiv_32, mode);
 161   PUT_MODE (all->smod_32, mode);
 162   PUT_MODE (all->wide_trunc, mode);
 163   PUT_MODE (all->shift, mode);
 164   PUT_MODE (all->shift_mult, mode);
 165   PUT_MODE (all->shift_add, mode);
 166   PUT_MODE (all->shift_sub0, mode);
 167   PUT_MODE (all->shift_sub1, mode);
 168   PUT_MODE (all->zext, mode);
 169   PUT_MODE (all->trunc, mode);
 170
 171   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
 172   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
 173   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
 174   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
 175   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
 176
 177   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
 178                                      <= 2 * add_cost (speed, mode)));
 179   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
 180                                      <= 4 * add_cost (speed, mode)));
 181
 182   set_shift_cost (speed, mode, 0, 0);
 183   {
 184     int cost = add_cost (speed, mode);
 185     set_shiftadd_cost (speed, mode, 0, cost);
 186     set_shiftsub0_cost (speed, mode, 0, cost);
 187     set_shiftsub1_cost (speed, mode, 0, cost);
 188   }
 189
 190   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 191   for (m = 1; m < n; m++)
 192     {
 193       XEXP (all->shift, 1) = all->cint[m];
 194       XEXP (all->shift_mult, 1) = all->pow2[m];
 195
 196       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
 197       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
 198                                                        speed));
 199       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
 200                                                         speed));
 201       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
 202                                                         speed));
 203     }
 204
 205   if (SCALAR_INT_MODE_P (mode))
 206     {
 207       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 208            mode_from = (machine_mode)(mode_from + 1))
 209         init_expmed_one_conv (all, mode, mode_from, speed);
 210     }
 211   if (GET_MODE_CLASS (mode) == MODE_INT)
 212     {
 213       machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
 214       if (wider_mode != VOIDmode)
 215         {
 216           PUT_MODE (all->zext, wider_mode);
 217           PUT_MODE (all->wide_mult, wider_mode);
 218           PUT_MODE (all->wide_lshr, wider_mode);
 219           XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 220
 221           set_mul_widen_cost (speed, wider_mode,
 222                               set_src_cost (all->wide_mult, wider_mode, speed));
 223           set_mul_highpart_cost (speed, mode,
 224                                  set_src_cost (all->wide_trunc, mode, speed));
 225         }
 226     }
 227 }
 228
 229 void
 230 init_expmed (void)
 231 {
 232   struct init_expmed_rtl all;
 233   machine_mode mode = QImode;
 234   int m, speed;
 235
 236   memset (&all, 0, sizeof all);
 237   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 238     {
 239       all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
 240       all.cint[m] = GEN_INT (m);
 241     }
 242
 243   /* Avoid using hard regs in ways which may be unsupported.  */
 244   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 245   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 246   all.neg = gen_rtx_NEG (mode, all.reg);
 247   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 248   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 249   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 250   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 251   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 252   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 253   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 254   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 255   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 256   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 257   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 258   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 259   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 260   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 261   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 262
 263   for (speed = 0; speed < 2; speed++)
 264     {
 265       crtl->maybe_hot_insn_p = speed;
 266       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
 267
 268       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 269            mode = (machine_mode)(mode + 1))
 270         init_expmed_one_mode (&all, mode, speed);
 271
 272       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 273         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 274              mode = (machine_mode)(mode + 1))
 275           init_expmed_one_mode (&all, mode, speed);
 276
 277       if (MIN_MODE_VECTOR_INT != VOIDmode)
 278         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 279              mode = (machine_mode)(mode + 1))
 280           init_expmed_one_mode (&all, mode, speed);
 281     }
 282
 283   if (alg_hash_used_p ())
 284     {
 285       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 286       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 287     }
 288   else
 289     set_alg_hash_used_p (true);
 290   default_rtl_profile ();
 291
 292   ggc_free (all.trunc);
 293   ggc_free (all.shift_sub1);
 294   ggc_free (all.shift_sub0);
 295   ggc_free (all.shift_add);
 296   ggc_free (all.shift_mult);
 297   ggc_free (all.shift);
 298   ggc_free (all.wide_trunc);
 299   ggc_free (all.wide_lshr);
 300   ggc_free (all.wide_mult);
 301   ggc_free (all.zext);
 302   ggc_free (all.smod_32);
 303   ggc_free (all.sdiv_32);
 304   ggc_free (all.udiv);
 305   ggc_free (all.sdiv);
 306   ggc_free (all.mult);
 307   ggc_free (all.neg);
 308   ggc_free (all.plus);
 309   ggc_free (all.reg);
 310 }
 311
 312 /* Return an rtx representing minus the value of X.
 313    MODE is the intended mode of the result,
 314    useful if X is a CONST_INT.  */
 315
 316 rtx
 317 negate_rtx (machine_mode mode, rtx x)
 318 {
 319   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 320
 321   if (result == 0)
 322     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 323
 324   return result;
 325 }
 326
 327 /* Whether reverse storage order is supported on the target.  */
 328 static int reverse_storage_order_supported = -1;
 329
 330 /* Check whether reverse storage order is supported on the target.  */
 331
 332 static void
 333 check_reverse_storage_order_support (void)
 334 {
 335   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 336     {
 337       reverse_storage_order_supported = 0;
 338       sorry ("reverse scalar storage order");
 339     }
 340   else
 341     reverse_storage_order_supported = 1;
 342 }
 343
 344 /* Whether reverse FP storage order is supported on the target.  */
 345 static int reverse_float_storage_order_supported = -1;
 346
 347 /* Check whether reverse FP storage order is supported on the target.  */
 348
 349 static void
 350 check_reverse_float_storage_order_support (void)
 351 {
 352   if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 353     {
 354       reverse_float_storage_order_supported = 0;
 355       sorry ("reverse floating-point scalar storage order");
 356     }
 357   else
 358     reverse_float_storage_order_supported = 1;
 359 }
 360
 361 /* Return an rtx representing value of X with reverse storage order.
 362    MODE is the intended mode of the result,
 363    useful if X is a CONST_INT.  */
 364
 365 rtx
 366 flip_storage_order (machine_mode mode, rtx x)
 367 {
 368   machine_mode int_mode;
 369   rtx result;
 370
 371   if (mode == QImode)
 372     return x;
 373
 374   if (COMPLEX_MODE_P (mode))
 375     {
 376       rtx real = read_complex_part (x, false);
 377       rtx imag = read_complex_part (x, true);
 378
 379       real = flip_storage_order (GET_MODE_INNER (mode), real);
 380       imag = flip_storage_order (GET_MODE_INNER (mode), imag);
 381
 382       return gen_rtx_CONCAT (mode, real, imag);
 383     }
 384
 385   if (__builtin_expect (reverse_storage_order_supported < 0, 0))
 386     check_reverse_storage_order_support ();
 387
 388   if (SCALAR_INT_MODE_P (mode))
 389     int_mode = mode;
 390   else
 391     {
 392       if (FLOAT_MODE_P (mode)
 393           && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
 394         check_reverse_float_storage_order_support ();
 395
 396       int_mode = mode_for_size (GET_MODE_PRECISION (mode), MODE_INT, 0);
 397       if (int_mode == BLKmode)
 398         {
 399           sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
 400           return x;
 401         }
 402       x = gen_lowpart (int_mode, x);
 403     }
 404
 405   result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
 406   if (result == 0)
 407     result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
 408
 409   if (int_mode != mode)
 410     result = gen_lowpart (mode, result);
 411
 412   return result;
 413 }
 414
 415 /* Adjust bitfield memory MEM so that it points to the first unit of mode
 416    MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
 417    If MODE is BLKmode, return a reference to every byte in the bitfield.
 418    Set *NEW_BITNUM to the bit position of the field within the new memory.  */
 419
 420 static rtx
 421 narrow_bit_field_mem (rtx mem, machine_mode mode,
 422                       unsigned HOST_WIDE_INT bitsize,
 423                       unsigned HOST_WIDE_INT bitnum,
 424                       unsigned HOST_WIDE_INT *new_bitnum)
 425 {
 426   if (mode == BLKmode)
 427     {
 428       *new_bitnum = bitnum % BITS_PER_UNIT;
 429       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 430       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 431                             / BITS_PER_UNIT);
 432       return adjust_bitfield_address_size (mem, mode, offset, size);
 433     }
 434   else
 435     {
 436       unsigned int unit = GET_MODE_BITSIZE (mode);
 437       *new_bitnum = bitnum % unit;
 438       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 439       return adjust_bitfield_address (mem, mode, offset);
 440     }
 441 }
 442
 443 /* The caller wants to perform insertion or extraction PATTERN on a
 444    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 445    BITREGION_START and BITREGION_END are as for store_bit_field
 446    and FIELDMODE is the natural mode of the field.
 447
 448    Search for a mode that is compatible with the memory access
 449    restrictions and (where applicable) with a register insertion or
 450    extraction.  Return the new memory on success, storing the adjusted
 451    bit position in *NEW_BITNUM.  Return null otherwise.  */
 452
 453 static rtx
 454 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 455                               rtx op0, HOST_WIDE_INT bitsize,
 456                               HOST_WIDE_INT bitnum,
 457                               unsigned HOST_WIDE_INT bitregion_start,
 458                               unsigned HOST_WIDE_INT bitregion_end,
 459                               machine_mode fieldmode,
 460                               unsigned HOST_WIDE_INT *new_bitnum)
 461 {
 462   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 463                                 bitregion_end, MEM_ALIGN (op0),
 464                                 MEM_VOLATILE_P (op0));
 465   machine_mode best_mode;
 466   if (iter.next_mode (&best_mode))
 467     {
 468       /* We can use a memory in BEST_MODE.  See whether this is true for
 469          any wider modes.  All other things being equal, we prefer to
 470          use the widest mode possible because it tends to expose more
 471          CSE opportunities.  */
 472       if (!iter.prefer_smaller_modes ())
 473         {
 474           /* Limit the search to the mode required by the corresponding
 475              register insertion or extraction instruction, if any.  */
 476           machine_mode limit_mode = word_mode;
 477           extraction_insn insn;
 478           if (get_best_reg_extraction_insn (&insn, pattern,
 479                                             GET_MODE_BITSIZE (best_mode),
 480                                             fieldmode))
 481             limit_mode = insn.field_mode;
 482
 483           machine_mode wider_mode;
 484           while (iter.next_mode (&wider_mode)
 485                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 486             best_mode = wider_mode;
 487         }
 488       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 489                                    new_bitnum);
 490     }
 491   return NULL_RTX;
 492 }
 493
 494 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 495    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 496    offset is then BITNUM / BITS_PER_UNIT.  */
 497
 498 static bool
 499 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 500                      unsigned HOST_WIDE_INT bitsize,
 501                      machine_mode struct_mode)
 502 {
 503   if (BYTES_BIG_ENDIAN)
 504     return (bitnum % BITS_PER_UNIT == 0
 505             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 506                 || (bitnum + bitsize) % BITS_PER_WORD == 0));
 507   else
 508     return bitnum % BITS_PER_WORD == 0;
 509 }
 510
 511 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 512    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 513    Return false if the access would touch memory outside the range
 514    BITREGION_START to BITREGION_END for conformance to the C++ memory
 515    model.  */
 516
 517 static bool
 518 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 519                             unsigned HOST_WIDE_INT bitnum,
 520                             machine_mode fieldmode,
 521                             unsigned HOST_WIDE_INT bitregion_start,
 522                             unsigned HOST_WIDE_INT bitregion_end)
 523 {
 524   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 525
 526   /* -fstrict-volatile-bitfields must be enabled and we must have a
 527      volatile MEM.  */
 528   if (!MEM_P (op0)
 529       || !MEM_VOLATILE_P (op0)
 530       || flag_strict_volatile_bitfields <= 0)
 531     return false;
 532
 533   /* Non-integral modes likely only happen with packed structures.
 534      Punt.  */
 535   if (!SCALAR_INT_MODE_P (fieldmode))
 536     return false;
 537
 538   /* The bit size must not be larger than the field mode, and
 539      the field mode must not be larger than a word.  */
 540   if (bitsize > modesize || modesize > BITS_PER_WORD)
 541     return false;
 542
 543   /* Check for cases of unaligned fields that must be split.  */
 544   if (bitnum % modesize + bitsize > modesize)
 545     return false;
 546
 547   /* The memory must be sufficiently aligned for a MODESIZE access.
 548      This condition guarantees, that the memory access will not
 549      touch anything after the end of the structure.  */
 550   if (MEM_ALIGN (op0) < modesize)
 551     return false;
 552
 553   /* Check for cases where the C++ memory model applies.  */
 554   if (bitregion_end != 0
 555       && (bitnum - bitnum % modesize < bitregion_start
 556           || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
 557     return false;
 558
 559   return true;
 560 }
 561
 562 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 563    bit number BITNUM can be treated as a simple value of mode MODE.  */
 564
 565 static bool
 566 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 567                        unsigned HOST_WIDE_INT bitnum, machine_mode mode)
 568 {
 569   return (MEM_P (op0)
 570           && bitnum % BITS_PER_UNIT == 0
 571           && bitsize == GET_MODE_BITSIZE (mode)
 572           && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
 573               || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
 574                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 575 }
 576 \f
 577 /* Try to use instruction INSV to store VALUE into a field of OP0.
 578    BITSIZE and BITNUM are as for store_bit_field.  */
 579
 580 static bool
 581 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 582                             unsigned HOST_WIDE_INT bitsize,
 583                             unsigned HOST_WIDE_INT bitnum,
 584                             rtx value)
 585 {
 586   struct expand_operand ops[4];
 587   rtx value1;
 588   rtx xop0 = op0;
 589   rtx_insn *last = get_last_insn ();
 590   bool copy_back = false;
 591
 592   machine_mode op_mode = insv->field_mode;
 593   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 594   if (bitsize == 0 || bitsize > unit)
 595     return false;
 596
 597   if (MEM_P (xop0))
 598     /* Get a reference to the first byte of the field.  */
 599     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 600                                  &bitnum);
 601   else
 602     {
 603       /* Convert from counting within OP0 to counting in OP_MODE.  */
 604       if (BYTES_BIG_ENDIAN)
 605         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 606
 607       /* If xop0 is a register, we need it in OP_MODE
 608          to make it acceptable to the format of insv.  */
 609       if (GET_CODE (xop0) == SUBREG)
 610         /* We can't just change the mode, because this might clobber op0,
 611            and we will need the original value of op0 if insv fails.  */
 612         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 613       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 614         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 615     }
 616
 617   /* If the destination is a paradoxical subreg such that we need a
 618      truncate to the inner mode, perform the insertion on a temporary and
 619      truncate the result to the original destination.  Note that we can't
 620      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 621      X) 0)) is (reg:N X).  */
 622   if (GET_CODE (xop0) == SUBREG
 623       && REG_P (SUBREG_REG (xop0))
 624       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 625                                          op_mode))
 626     {
 627       rtx tem = gen_reg_rtx (op_mode);
 628       emit_move_insn (tem, xop0);
 629       xop0 = tem;
 630       copy_back = true;
 631     }
 632
 633   /* There are similar overflow check at the start of store_bit_field_1,
 634      but that only check the situation where the field lies completely
 635      outside the register, while there do have situation where the field
 636      lies partialy in the register, we need to adjust bitsize for this
 637      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
 638      will broken on those arch support bit insert instruction, like arm, aarch64
 639      etc.  */
 640   if (bitsize + bitnum > unit && bitnum < unit)
 641     {
 642       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
 643                "destination object, data truncated into %wu-bit",
 644                bitsize, unit - bitnum);
 645       bitsize = unit - bitnum;
 646     }
 647
 648   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 649      "backwards" from the size of the unit we are inserting into.
 650      Otherwise, we count bits from the most significant on a
 651      BYTES/BITS_BIG_ENDIAN machine.  */
 652
 653   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 654     bitnum = unit - bitsize - bitnum;
 655
 656   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 657   value1 = value;
 658   if (GET_MODE (value) != op_mode)
 659     {
 660       if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 661         {
 662           rtx tmp;
 663           /* Optimization: Don't bother really extending VALUE
 664              if it has all the bits we will actually use.  However,
 665              if we must narrow it, be sure we do it correctly.  */
 666
 667           if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 668             {
 669               tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 670               if (! tmp)
 671                 tmp = simplify_gen_subreg (op_mode,
 672                                            force_reg (GET_MODE (value),
 673                                                       value1),
 674                                            GET_MODE (value), 0);
 675             }
 676           else
 677             {
 678               tmp = gen_lowpart_if_possible (op_mode, value1);
 679               if (! tmp)
 680                 tmp = gen_lowpart (op_mode, force_reg (GET_MODE (value),
 681                                                        value1));
 682             }
 683           value1 = tmp;
 684         }
 685       else if (CONST_INT_P (value))
 686         value1 = gen_int_mode (INTVAL (value), op_mode);
 687       else
 688         /* Parse phase is supposed to make VALUE's data type
 689            match that of the component reference, which is a type
 690            at least as wide as the field; so VALUE should have
 691            a mode that corresponds to that type.  */
 692         gcc_assert (CONSTANT_P (value));
 693     }
 694
 695   create_fixed_operand (&ops[0], xop0);
 696   create_integer_operand (&ops[1], bitsize);
 697   create_integer_operand (&ops[2], bitnum);
 698   create_input_operand (&ops[3], value1, op_mode);
 699   if (maybe_expand_insn (insv->icode, 4, ops))
 700     {
 701       if (copy_back)
 702         convert_move (op0, xop0, true);
 703       return true;
 704     }
 705   delete_insns_since (last);
 706   return false;
 707 }
 708
 709 /* A subroutine of store_bit_field, with the same arguments.  Return true
 710    if the operation could be implemented.
 711
 712    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 713    no other way of implementing the operation.  If FALLBACK_P is false,
 714    return false instead.  */
 715
 716 static bool
 717 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 718                    unsigned HOST_WIDE_INT bitnum,
 719                    unsigned HOST_WIDE_INT bitregion_start,
 720                    unsigned HOST_WIDE_INT bitregion_end,
 721                    machine_mode fieldmode,
 722                    rtx value, bool reverse, bool fallback_p)
 723 {
 724   rtx op0 = str_rtx;
 725   rtx orig_value;
 726
 727   while (GET_CODE (op0) == SUBREG)
 728     {
 729       /* The following line once was done only if WORDS_BIG_ENDIAN,
 730          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 731          meaningful at a much higher level; when structures are copied
 732          between memory and regs, the higher-numbered regs
 733          always get higher addresses.  */
 734       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 735       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 736       int byte_offset = 0;
 737
 738       /* Paradoxical subregs need special handling on big-endian machines.  */
 739       if (paradoxical_subreg_p (op0))
 740         {
 741           int difference = inner_mode_size - outer_mode_size;
 742
 743           if (WORDS_BIG_ENDIAN)
 744             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 745           if (BYTES_BIG_ENDIAN)
 746             byte_offset += difference % UNITS_PER_WORD;
 747         }
 748       else
 749         byte_offset = SUBREG_BYTE (op0);
 750
 751       bitnum += byte_offset * BITS_PER_UNIT;
 752       op0 = SUBREG_REG (op0);
 753     }
 754
 755   /* No action is needed if the target is a register and if the field
 756      lies completely outside that register.  This can occur if the source
 757      code contains an out-of-bounds access to a small array.  */
 758   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 759     return true;
 760
 761   /* Use vec_set patterns for inserting parts of vectors whenever
 762      available.  */
 763   if (VECTOR_MODE_P (GET_MODE (op0))
 764       && !MEM_P (op0)
 765       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 766       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 767       && bitsize == GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
 768       && !(bitnum % GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
 769     {
 770       struct expand_operand ops[3];
 771       machine_mode outermode = GET_MODE (op0);
 772       machine_mode innermode = GET_MODE_INNER (outermode);
 773       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 774       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 775
 776       create_fixed_operand (&ops[0], op0);
 777       create_input_operand (&ops[1], value, innermode);
 778       create_integer_operand (&ops[2], pos);
 779       if (maybe_expand_insn (icode, 3, ops))
 780         return true;
 781     }
 782
 783   /* If the target is a register, overwriting the entire object, or storing
 784      a full-word or multi-word field can be done with just a SUBREG.  */
 785   if (!MEM_P (op0)
 786       && bitsize == GET_MODE_BITSIZE (fieldmode)
 787       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 788           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 789     {
 790       /* Use the subreg machinery either to narrow OP0 to the required
 791          words or to cope with mode punning between equal-sized modes.
 792          In the latter case, use subreg on the rhs side, not lhs.  */
 793       rtx sub;
 794
 795       if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
 796         {
 797           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 798           if (sub)
 799             {
 800               if (reverse)
 801                 sub = flip_storage_order (GET_MODE (op0), sub);
 802               emit_move_insn (op0, sub);
 803               return true;
 804             }
 805         }
 806       else
 807         {
 808           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 809                                      bitnum / BITS_PER_UNIT);
 810           if (sub)
 811             {
 812               if (reverse)
 813                 value = flip_storage_order (fieldmode, value);
 814               emit_move_insn (sub, value);
 815               return true;
 816             }
 817         }
 818     }
 819
 820   /* If the target is memory, storing any naturally aligned field can be
 821      done with a simple store.  For targets that support fast unaligned
 822      memory, any naturally sized, unit aligned field can be done directly.  */
 823   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
 824     {
 825       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 826       if (reverse)
 827         value = flip_storage_order (fieldmode, value);
 828       emit_move_insn (op0, value);
 829       return true;
 830     }
 831
 832   /* Make sure we are playing with integral modes.  Pun with subregs
 833      if we aren't.  This must come after the entire register case above,
 834      since that case is valid for any mode.  The following cases are only
 835      valid for integral modes.  */
 836   {
 837     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 838     if (imode != GET_MODE (op0))
 839       {
 840         if (MEM_P (op0))
 841           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
 842         else
 843           {
 844             gcc_assert (imode != BLKmode);
 845             op0 = gen_lowpart (imode, op0);
 846           }
 847       }
 848   }
 849
 850   /* Storing an lsb-aligned field in a register
 851      can be done with a movstrict instruction.  */
 852
 853   if (!MEM_P (op0)
 854       && !reverse
 855       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 856       && bitsize == GET_MODE_BITSIZE (fieldmode)
 857       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 858     {
 859       struct expand_operand ops[2];
 860       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 861       rtx arg0 = op0;
 862       unsigned HOST_WIDE_INT subreg_off;
 863
 864       if (GET_CODE (arg0) == SUBREG)
 865         {
 866           /* Else we've got some float mode source being extracted into
 867              a different float mode destination -- this combination of
 868              subregs results in Severe Tire Damage.  */
 869           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 870                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 871                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 872           arg0 = SUBREG_REG (arg0);
 873         }
 874
 875       subreg_off = bitnum / BITS_PER_UNIT;
 876       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 877         {
 878           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 879
 880           create_fixed_operand (&ops[0], arg0);
 881           /* Shrink the source operand to FIELDMODE.  */
 882           create_convert_operand_to (&ops[1], value, fieldmode, false);
 883           if (maybe_expand_insn (icode, 2, ops))
 884             return true;
 885         }
 886     }
 887
 888   /* Handle fields bigger than a word.  */
 889
 890   if (bitsize > BITS_PER_WORD)
 891     {
 892       /* Here we transfer the words of the field
 893          in the order least significant first.
 894          This is because the most significant word is the one which may
 895          be less than full.
 896          However, only do that if the value is not BLKmode.  */
 897
 898       const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 899       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 900       unsigned int i;
 901       rtx_insn *last;
 902
 903       /* This is the mode we must force value to, so that there will be enough
 904          subwords to extract.  Note that fieldmode will often (always?) be
 905          VOIDmode, because that is what store_field uses to indicate that this
 906          is a bit field, but passing VOIDmode to operand_subword_force
 907          is not allowed.  */
 908       fieldmode = GET_MODE (value);
 909       if (fieldmode == VOIDmode)
 910         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 911
 912       last = get_last_insn ();
 913       for (i = 0; i < nwords; i++)
 914         {
 915           /* If I is 0, use the low-order word in both field and target;
 916              if I is 1, use the next to lowest word; and so on.  */
 917           unsigned int wordnum = (backwards
 918                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 919                                   - i - 1
 920                                   : i);
 921           unsigned int bit_offset = (backwards ^ reverse
 922                                      ? MAX ((int) bitsize - ((int) i + 1)
 923                                             * BITS_PER_WORD,
 924                                             0)
 925                                      : (int) i * BITS_PER_WORD);
 926           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 927           unsigned HOST_WIDE_INT new_bitsize =
 928             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 929
 930           /* If the remaining chunk doesn't have full wordsize we have
 931              to make sure that for big-endian machines the higher order
 932              bits are used.  */
 933           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 934             value_word = simplify_expand_binop (word_mode, lshr_optab,
 935                                                 value_word,
 936                                                 GEN_INT (BITS_PER_WORD
 937                                                          - new_bitsize),
 938                                                 NULL_RTX, true,
 939                                                 OPTAB_LIB_WIDEN);
 940
 941           if (!store_bit_field_1 (op0, new_bitsize,
 942                                   bitnum + bit_offset,
 943                                   bitregion_start, bitregion_end,
 944                                   word_mode,
 945                                   value_word, reverse, fallback_p))
 946             {
 947               delete_insns_since (last);
 948               return false;
 949             }
 950         }
 951       return true;
 952     }
 953
 954   /* If VALUE has a floating-point or complex mode, access it as an
 955      integer of the corresponding size.  This can occur on a machine
 956      with 64 bit registers that uses SFmode for float.  It can also
 957      occur for unaligned float or complex fields.  */
 958   orig_value = value;
 959   if (GET_MODE (value) != VOIDmode
 960       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 961       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 962     {
 963       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 964       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 965     }
 966
 967   /* If OP0 is a multi-word register, narrow it to the affected word.
 968      If the region spans two words, defer to store_split_bit_field.
 969      Don't do this if op0 is a single hard register wider than word
 970      such as a float or vector register.  */
 971   if (!MEM_P (op0)
 972       && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD
 973       && (!REG_P (op0)
 974           || !HARD_REGISTER_P (op0)
 975           || HARD_REGNO_NREGS (REGNO (op0), GET_MODE (op0)) != 1))
 976     {
 977       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
 978         {
 979           if (!fallback_p)
 980             return false;
 981
 982           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 983                                  bitregion_end, value, reverse);
 984           return true;
 985         }
 986       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
 987                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 988       gcc_assert (op0);
 989       bitnum %= BITS_PER_WORD;
 990     }
 991
 992   /* From here on we can assume that the field to be stored in fits
 993      within a word.  If the destination is a register, it too fits
 994      in a word.  */
 995
 996   extraction_insn insv;
 997   if (!MEM_P (op0)
 998       && !reverse
 999       && get_best_reg_extraction_insn (&insv, EP_insv,
1000                                        GET_MODE_BITSIZE (GET_MODE (op0)),
1001                                        fieldmode)
1002       && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
1003     return true;
1004
1005   /* If OP0 is a memory, try copying it to a register and seeing if a
1006      cheap register alternative is available.  */
1007   if (MEM_P (op0) && !reverse)
1008     {
1009       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1010                                         fieldmode)
1011           && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
1012         return true;
1013
1014       rtx_insn *last = get_last_insn ();
1015
1016       /* Try loading part of OP0 into a register, inserting the bitfield
1017          into that, and then copying the result back to OP0.  */
1018       unsigned HOST_WIDE_INT bitpos;
1019       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1020                                                bitregion_start, bitregion_end,
1021                                                fieldmode, &bitpos);
1022       if (xop0)
1023         {
1024           rtx tempreg = copy_to_reg (xop0);
1025           if (store_bit_field_1 (tempreg, bitsize, bitpos,
1026                                  bitregion_start, bitregion_end,
1027                                  fieldmode, orig_value, reverse, false))
1028             {
1029               emit_move_insn (xop0, tempreg);
1030               return true;
1031             }
1032           delete_insns_since (last);
1033         }
1034     }
1035
1036   if (!fallback_p)
1037     return false;
1038
1039   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
1040                          bitregion_end, value, reverse);
1041   return true;
1042 }
1043
1044 /* Generate code to store value from rtx VALUE
1045    into a bit-field within structure STR_RTX
1046    containing BITSIZE bits starting at bit BITNUM.
1047
1048    BITREGION_START is bitpos of the first bitfield in this region.
1049    BITREGION_END is the bitpos of the ending bitfield in this region.
1050    These two fields are 0, if the C++ memory model does not apply,
1051    or we are not interested in keeping track of bitfield regions.
1052
1053    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1054
1055    If REVERSE is true, the store is to be done in reverse order.  */
1056
1057 void
1058 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1059                  unsigned HOST_WIDE_INT bitnum,
1060                  unsigned HOST_WIDE_INT bitregion_start,
1061                  unsigned HOST_WIDE_INT bitregion_end,
1062                  machine_mode fieldmode,
1063                  rtx value, bool reverse)
1064 {
1065   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1066   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
1067                                   bitregion_start, bitregion_end))
1068     {
1069       /* Storing of a full word can be done with a simple store.
1070          We know here that the field can be accessed with one single
1071          instruction.  For targets that support unaligned memory,
1072          an unaligned access may be necessary.  */
1073       if (bitsize == GET_MODE_BITSIZE (fieldmode))
1074         {
1075           str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
1076                                              bitnum / BITS_PER_UNIT);
1077           if (reverse)
1078             value = flip_storage_order (fieldmode, value);
1079           gcc_assert (bitnum % BITS_PER_UNIT == 0);
1080           emit_move_insn (str_rtx, value);
1081         }
1082       else
1083         {
1084           rtx temp;
1085
1086           str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
1087                                           &bitnum);
1088           gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (fieldmode));
1089           temp = copy_to_reg (str_rtx);
1090           if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
1091                                   fieldmode, value, reverse, true))
1092             gcc_unreachable ();
1093
1094           emit_move_insn (str_rtx, temp);
1095         }
1096
1097       return;
1098     }
1099
1100   /* Under the C++0x memory model, we must not touch bits outside the
1101      bit region.  Adjust the address to start at the beginning of the
1102      bit region.  */
1103   if (MEM_P (str_rtx) && bitregion_start > 0)
1104     {
1105       machine_mode bestmode;
1106       HOST_WIDE_INT offset, size;
1107
1108       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1109
1110       offset = bitregion_start / BITS_PER_UNIT;
1111       bitnum -= bitregion_start;
1112       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1113       bitregion_end -= bitregion_start;
1114       bitregion_start = 0;
1115       bestmode = get_best_mode (bitsize, bitnum,
1116                                 bitregion_start, bitregion_end,
1117                                 MEM_ALIGN (str_rtx), VOIDmode,
1118                                 MEM_VOLATILE_P (str_rtx));
1119       str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
1120     }
1121
1122   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1123                           bitregion_start, bitregion_end,
1124                           fieldmode, value, reverse, true))
1125     gcc_unreachable ();
1126 }
1127 \f
1128 /* Use shifts and boolean operations to store VALUE into a bit field of
1129    width BITSIZE in OP0, starting at bit BITNUM.
1130
1131    If REVERSE is true, the store is to be done in reverse order.  */
1132
1133 static void
1134 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1135                        unsigned HOST_WIDE_INT bitnum,
1136                        unsigned HOST_WIDE_INT bitregion_start,
1137                        unsigned HOST_WIDE_INT bitregion_end,
1138                        rtx value, bool reverse)
1139 {
1140   /* There is a case not handled here:
1141      a structure with a known alignment of just a halfword
1142      and a field split across two aligned halfwords within the structure.
1143      Or likewise a structure with a known alignment of just a byte
1144      and a field split across two bytes.
1145      Such cases are not supposed to be able to occur.  */
1146
1147   if (MEM_P (op0))
1148     {
1149       machine_mode mode = GET_MODE (op0);
1150       if (GET_MODE_BITSIZE (mode) == 0
1151           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1152         mode = word_mode;
1153       mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1154                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1155
1156       if (mode == VOIDmode)
1157         {
1158           /* The only way this should occur is if the field spans word
1159              boundaries.  */
1160           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1161                                  bitregion_end, value, reverse);
1162           return;
1163         }
1164
1165       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1166     }
1167
1168   store_fixed_bit_field_1 (op0, bitsize, bitnum, value, reverse);
1169 }
1170
1171 /* Helper function for store_fixed_bit_field, stores
1172    the bit field always using the MODE of OP0.  */
1173
1174 static void
1175 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1176                          unsigned HOST_WIDE_INT bitnum,
1177                          rtx value, bool reverse)
1178 {
1179   machine_mode mode;
1180   rtx temp;
1181   int all_zero = 0;
1182   int all_one = 0;
1183
1184   mode = GET_MODE (op0);
1185   gcc_assert (SCALAR_INT_MODE_P (mode));
1186
1187   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1188      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1189
1190   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1191     /* BITNUM is the distance between our msb
1192        and that of the containing datum.
1193        Convert it to the distance from the lsb.  */
1194     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1195
1196   /* Now BITNUM is always the distance between our lsb
1197      and that of OP0.  */
1198
1199   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1200      we must first convert its mode to MODE.  */
1201
1202   if (CONST_INT_P (value))
1203     {
1204       unsigned HOST_WIDE_INT v = UINTVAL (value);
1205
1206       if (bitsize < HOST_BITS_PER_WIDE_INT)
1207         v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1208
1209       if (v == 0)
1210         all_zero = 1;
1211       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1212                 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1213                || (bitsize == HOST_BITS_PER_WIDE_INT
1214                    && v == HOST_WIDE_INT_M1U))
1215         all_one = 1;
1216
1217       value = lshift_value (mode, v, bitnum);
1218     }
1219   else
1220     {
1221       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1222                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1223
1224       if (GET_MODE (value) != mode)
1225         value = convert_to_mode (mode, value, 1);
1226
1227       if (must_and)
1228         value = expand_binop (mode, and_optab, value,
1229                               mask_rtx (mode, 0, bitsize, 0),
1230                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1231       if (bitnum > 0)
1232         value = expand_shift (LSHIFT_EXPR, mode, value,
1233                               bitnum, NULL_RTX, 1);
1234     }
1235
1236   if (reverse)
1237     value = flip_storage_order (mode, value);
1238
1239   /* Now clear the chosen bits in OP0,
1240      except that if VALUE is -1 we need not bother.  */
1241   /* We keep the intermediates in registers to allow CSE to combine
1242      consecutive bitfield assignments.  */
1243
1244   temp = force_reg (mode, op0);
1245
1246   if (! all_one)
1247     {
1248       rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1249       if (reverse)
1250         mask = flip_storage_order (mode, mask);
1251       temp = expand_binop (mode, and_optab, temp, mask,
1252                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1253       temp = force_reg (mode, temp);
1254     }
1255
1256   /* Now logical-or VALUE into OP0, unless it is zero.  */
1257
1258   if (! all_zero)
1259     {
1260       temp = expand_binop (mode, ior_optab, temp, value,
1261                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1262       temp = force_reg (mode, temp);
1263     }
1264
1265   if (op0 != temp)
1266     {
1267       op0 = copy_rtx (op0);
1268       emit_move_insn (op0, temp);
1269     }
1270 }
1271 \f
1272 /* Store a bit field that is split across multiple accessible memory objects.
1273
1274    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1275    BITSIZE is the field width; BITPOS the position of its first bit
1276    (within the word).
1277    VALUE is the value to store.
1278
1279    If REVERSE is true, the store is to be done in reverse order.
1280
1281    This does not yet handle fields wider than BITS_PER_WORD.  */
1282
1283 static void
1284 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1285                        unsigned HOST_WIDE_INT bitpos,
1286                        unsigned HOST_WIDE_INT bitregion_start,
1287                        unsigned HOST_WIDE_INT bitregion_end,
1288                        rtx value, bool reverse)
1289 {
1290   unsigned int unit, total_bits, bitsdone = 0;
1291
1292   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1293      much at a time.  */
1294   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1295     unit = BITS_PER_WORD;
1296   else
1297     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1298
1299   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1300      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1301      again, and we will mutually recurse forever.  */
1302   if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1303     unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1304
1305   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1306      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1307      that VALUE might be a floating-point constant.  */
1308   if (CONSTANT_P (value) && !CONST_INT_P (value))
1309     {
1310       rtx word = gen_lowpart_common (word_mode, value);
1311
1312       if (word && (value != word))
1313         value = word;
1314       else
1315         value = gen_lowpart_common (word_mode,
1316                                     force_reg (GET_MODE (value) != VOIDmode
1317                                                ? GET_MODE (value)
1318                                                : word_mode, value));
1319     }
1320
1321   total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1322
1323   while (bitsdone < bitsize)
1324     {
1325       unsigned HOST_WIDE_INT thissize;
1326       unsigned HOST_WIDE_INT thispos;
1327       unsigned HOST_WIDE_INT offset;
1328       rtx part, word;
1329
1330       offset = (bitpos + bitsdone) / unit;
1331       thispos = (bitpos + bitsdone) % unit;
1332
1333       /* When region of bytes we can touch is restricted, decrease
1334          UNIT close to the end of the region as needed.  If op0 is a REG
1335          or SUBREG of REG, don't do this, as there can't be data races
1336          on a register and we can expand shorter code in some cases.  */
1337       if (bitregion_end
1338           && unit > BITS_PER_UNIT
1339           && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1340           && !REG_P (op0)
1341           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1342         {
1343           unit = unit / 2;
1344           continue;
1345         }
1346
1347       /* THISSIZE must not overrun a word boundary.  Otherwise,
1348          store_fixed_bit_field will call us again, and we will mutually
1349          recurse forever.  */
1350       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1351       thissize = MIN (thissize, unit - thispos);
1352
1353       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1354         {
1355           /* Fetch successively less significant portions.  */
1356           if (CONST_INT_P (value))
1357             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1358                              >> (bitsize - bitsdone - thissize))
1359                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1360           /* Likewise, but the source is little-endian.  */
1361           else if (reverse)
1362             part = extract_fixed_bit_field (word_mode, value, thissize,
1363                                             bitsize - bitsdone - thissize,
1364                                             NULL_RTX, 1, false);
1365           else
1366             {
1367               int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1368               /* The args are chosen so that the last part includes the
1369                  lsb.  Give extract_bit_field the value it needs (with
1370                  endianness compensation) to fetch the piece we want.  */
1371               part = extract_fixed_bit_field (word_mode, value, thissize,
1372                                               total_bits - bitsize + bitsdone,
1373                                               NULL_RTX, 1, false);
1374             }
1375         }
1376       else
1377         {
1378           /* Fetch successively more significant portions.  */
1379           if (CONST_INT_P (value))
1380             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1381                              >> bitsdone)
1382                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1383           /* Likewise, but the source is big-endian.  */
1384           else if (reverse)
1385             part = extract_fixed_bit_field (word_mode, value, thissize,
1386                                             total_bits - bitsdone - thissize,
1387                                             NULL_RTX, 1, false);
1388           else
1389             part = extract_fixed_bit_field (word_mode, value, thissize,
1390                                             bitsdone, NULL_RTX, 1, false);
1391         }
1392
1393       /* If OP0 is a register, then handle OFFSET here.  */
1394       if (SUBREG_P (op0) || REG_P (op0))
1395         {
1396           machine_mode op0_mode = GET_MODE (op0);
1397           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1398             word = offset ? const0_rtx : op0;
1399           else
1400             word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1401                                           GET_MODE (op0));
1402           offset &= BITS_PER_WORD / unit - 1;
1403         }
1404       else
1405         word = op0;
1406
1407       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1408          it is just an out-of-bounds access.  Ignore it.  */
1409       if (word != const0_rtx)
1410         store_fixed_bit_field (word, thissize, offset * unit + thispos,
1411                                bitregion_start, bitregion_end, part,
1412                                reverse);
1413       bitsdone += thissize;
1414     }
1415 }
1416 \f
1417 /* A subroutine of extract_bit_field_1 that converts return value X
1418    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1419    to extract_bit_field.  */
1420
1421 static rtx
1422 convert_extracted_bit_field (rtx x, machine_mode mode,
1423                              machine_mode tmode, bool unsignedp)
1424 {
1425   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1426     return x;
1427
1428   /* If the x mode is not a scalar integral, first convert to the
1429      integer mode of that size and then access it as a floating-point
1430      value via a SUBREG.  */
1431   if (!SCALAR_INT_MODE_P (tmode))
1432     {
1433       machine_mode smode;
1434
1435       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1436       x = convert_to_mode (smode, x, unsignedp);
1437       x = force_reg (smode, x);
1438       return gen_lowpart (tmode, x);
1439     }
1440
1441   return convert_to_mode (tmode, x, unsignedp);
1442 }
1443
1444 /* Try to use an ext(z)v pattern to extract a field from OP0.
1445    Return the extracted value on success, otherwise return null.
1446    EXT_MODE is the mode of the extraction and the other arguments
1447    are as for extract_bit_field.  */
1448
1449 static rtx
1450 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1451                               unsigned HOST_WIDE_INT bitsize,
1452                               unsigned HOST_WIDE_INT bitnum,
1453                               int unsignedp, rtx target,
1454                               machine_mode mode, machine_mode tmode)
1455 {
1456   struct expand_operand ops[4];
1457   rtx spec_target = target;
1458   rtx spec_target_subreg = 0;
1459   machine_mode ext_mode = extv->field_mode;
1460   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1461
1462   if (bitsize == 0 || unit < bitsize)
1463     return NULL_RTX;
1464
1465   if (MEM_P (op0))
1466     /* Get a reference to the first byte of the field.  */
1467     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1468                                 &bitnum);
1469   else
1470     {
1471       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1472       if (BYTES_BIG_ENDIAN)
1473         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1474
1475       /* If op0 is a register, we need it in EXT_MODE to make it
1476          acceptable to the format of ext(z)v.  */
1477       if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1478         return NULL_RTX;
1479       if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1480         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1481     }
1482
1483   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1484      "backwards" from the size of the unit we are extracting from.
1485      Otherwise, we count bits from the most significant on a
1486      BYTES/BITS_BIG_ENDIAN machine.  */
1487
1488   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1489     bitnum = unit - bitsize - bitnum;
1490
1491   if (target == 0)
1492     target = spec_target = gen_reg_rtx (tmode);
1493
1494   if (GET_MODE (target) != ext_mode)
1495     {
1496       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1497          between the mode of the extraction (word_mode) and the target
1498          mode.  Instead, create a temporary and use convert_move to set
1499          the target.  */
1500       if (REG_P (target)
1501           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1502         {
1503           target = gen_lowpart (ext_mode, target);
1504           if (GET_MODE_PRECISION (ext_mode)
1505               > GET_MODE_PRECISION (GET_MODE (spec_target)))
1506             spec_target_subreg = target;
1507         }
1508       else
1509         target = gen_reg_rtx (ext_mode);
1510     }
1511
1512   create_output_operand (&ops[0], target, ext_mode);
1513   create_fixed_operand (&ops[1], op0);
1514   create_integer_operand (&ops[2], bitsize);
1515   create_integer_operand (&ops[3], bitnum);
1516   if (maybe_expand_insn (extv->icode, 4, ops))
1517     {
1518       target = ops[0].value;
1519       if (target == spec_target)
1520         return target;
1521       if (target == spec_target_subreg)
1522         return spec_target;
1523       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1524     }
1525   return NULL_RTX;
1526 }
1527
1528 /* A subroutine of extract_bit_field, with the same arguments.
1529    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1530    if we can find no other means of implementing the operation.
1531    if FALLBACK_P is false, return NULL instead.  */
1532
1533 static rtx
1534 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1535                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1536                      machine_mode mode, machine_mode tmode,
1537                      bool reverse, bool fallback_p, rtx *alt_rtl)
1538 {
1539   rtx op0 = str_rtx;
1540   machine_mode int_mode;
1541   machine_mode mode1;
1542
1543   if (tmode == VOIDmode)
1544     tmode = mode;
1545
1546   while (GET_CODE (op0) == SUBREG)
1547     {
1548       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1549       op0 = SUBREG_REG (op0);
1550     }
1551
1552   /* If we have an out-of-bounds access to a register, just return an
1553      uninitialized register of the required mode.  This can occur if the
1554      source code contains an out-of-bounds access to a small array.  */
1555   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1556     return gen_reg_rtx (tmode);
1557
1558   if (REG_P (op0)
1559       && mode == GET_MODE (op0)
1560       && bitnum == 0
1561       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1562     {
1563       if (reverse)
1564         op0 = flip_storage_order (mode, op0);
1565       /* We're trying to extract a full register from itself.  */
1566       return op0;
1567     }
1568
1569   /* First try to check for vector from vector extractions.  */
1570   if (VECTOR_MODE_P (GET_MODE (op0))
1571       && !MEM_P (op0)
1572       && VECTOR_MODE_P (tmode)
1573       && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (tmode))
1574     {
1575       machine_mode new_mode = GET_MODE (op0);
1576       if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1577         {
1578           new_mode = mode_for_vector (GET_MODE_INNER (tmode),
1579                                       GET_MODE_BITSIZE (GET_MODE (op0))
1580                                       / GET_MODE_UNIT_BITSIZE (tmode));
1581           if (!VECTOR_MODE_P (new_mode)
1582               || GET_MODE_SIZE (new_mode) != GET_MODE_SIZE (GET_MODE (op0))
1583               || GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode)
1584               || !targetm.vector_mode_supported_p (new_mode))
1585             new_mode = VOIDmode;
1586         }
1587       if (new_mode != VOIDmode
1588           && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1589               != CODE_FOR_nothing)
1590           && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (tmode)
1591               == bitnum / GET_MODE_BITSIZE (tmode)))
1592         {
1593           struct expand_operand ops[3];
1594           machine_mode outermode = new_mode;
1595           machine_mode innermode = tmode;
1596           enum insn_code icode
1597             = convert_optab_handler (vec_extract_optab, outermode, innermode);
1598           unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1599
1600           if (new_mode != GET_MODE (op0))
1601             op0 = gen_lowpart (new_mode, op0);
1602           create_output_operand (&ops[0], target, innermode);
1603           ops[0].target = 1;
1604           create_input_operand (&ops[1], op0, outermode);
1605           create_integer_operand (&ops[2], pos);
1606           if (maybe_expand_insn (icode, 3, ops))
1607             {
1608               if (alt_rtl && ops[0].target)
1609                 *alt_rtl = target;
1610               target = ops[0].value;
1611               if (GET_MODE (target) != mode)
1612                 return gen_lowpart (tmode, target);
1613               return target;
1614             }
1615         }
1616     }
1617
1618   /* See if we can get a better vector mode before extracting.  */
1619   if (VECTOR_MODE_P (GET_MODE (op0))
1620       && !MEM_P (op0)
1621       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1622     {
1623       machine_mode new_mode;
1624
1625       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1626         new_mode = MIN_MODE_VECTOR_FLOAT;
1627       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1628         new_mode = MIN_MODE_VECTOR_FRACT;
1629       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1630         new_mode = MIN_MODE_VECTOR_UFRACT;
1631       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1632         new_mode = MIN_MODE_VECTOR_ACCUM;
1633       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1634         new_mode = MIN_MODE_VECTOR_UACCUM;
1635       else
1636         new_mode = MIN_MODE_VECTOR_INT;
1637
1638       FOR_EACH_MODE_FROM (new_mode, new_mode)
1639         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1640             && GET_MODE_UNIT_SIZE (new_mode) == GET_MODE_SIZE (tmode)
1641             && targetm.vector_mode_supported_p (new_mode))
1642           break;
1643       if (new_mode != VOIDmode)
1644         op0 = gen_lowpart (new_mode, op0);
1645     }
1646
1647   /* Use vec_extract patterns for extracting parts of vectors whenever
1648      available.  */
1649   if (VECTOR_MODE_P (GET_MODE (op0))
1650       && !MEM_P (op0)
1651       && (convert_optab_handler (vec_extract_optab, GET_MODE (op0),
1652                                  GET_MODE_INNER (GET_MODE (op0)))
1653           != CODE_FOR_nothing)
1654       && ((bitnum + bitsize - 1) / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
1655           == bitnum / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
1656     {
1657       struct expand_operand ops[3];
1658       machine_mode outermode = GET_MODE (op0);
1659       machine_mode innermode = GET_MODE_INNER (outermode);
1660       enum insn_code icode
1661         = convert_optab_handler (vec_extract_optab, outermode, innermode);
1662       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1663
1664       create_output_operand (&ops[0], target, innermode);
1665       ops[0].target = 1;
1666       create_input_operand (&ops[1], op0, outermode);
1667       create_integer_operand (&ops[2], pos);
1668       if (maybe_expand_insn (icode, 3, ops))
1669         {
1670           if (alt_rtl && ops[0].target)
1671             *alt_rtl = target;
1672           target = ops[0].value;
1673           if (GET_MODE (target) != mode)
1674             return gen_lowpart (tmode, target);
1675           return target;
1676         }
1677     }
1678
1679   /* Make sure we are playing with integral modes.  Pun with subregs
1680      if we aren't.  */
1681   {
1682     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1683     if (imode != GET_MODE (op0))
1684       {
1685         if (MEM_P (op0))
1686           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1687         else if (imode != BLKmode)
1688           {
1689             op0 = gen_lowpart (imode, op0);
1690
1691             /* If we got a SUBREG, force it into a register since we
1692                aren't going to be able to do another SUBREG on it.  */
1693             if (GET_CODE (op0) == SUBREG)
1694               op0 = force_reg (imode, op0);
1695           }
1696         else
1697           {
1698             HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1699             rtx mem = assign_stack_temp (GET_MODE (op0), size);
1700             emit_move_insn (mem, op0);
1701             op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1702           }
1703       }
1704   }
1705
1706   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1707      If that's wrong, the solution is to test for it and set TARGET to 0
1708      if needed.  */
1709
1710   /* Get the mode of the field to use for atomic access or subreg
1711      conversion.  */
1712   mode1 = mode;
1713   if (SCALAR_INT_MODE_P (tmode))
1714     {
1715       machine_mode try_mode = mode_for_size (bitsize,
1716                                                   GET_MODE_CLASS (tmode), 0);
1717       if (try_mode != BLKmode)
1718         mode1 = try_mode;
1719     }
1720   gcc_assert (mode1 != BLKmode);
1721
1722   /* Extraction of a full MODE1 value can be done with a subreg as long
1723      as the least significant bit of the value is the least significant
1724      bit of either OP0 or a word of OP0.  */
1725   if (!MEM_P (op0)
1726       && !reverse
1727       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1728       && bitsize == GET_MODE_BITSIZE (mode1)
1729       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1730     {
1731       rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1732                                      bitnum / BITS_PER_UNIT);
1733       if (sub)
1734         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1735     }
1736
1737   /* Extraction of a full MODE1 value can be done with a load as long as
1738      the field is on a byte boundary and is sufficiently aligned.  */
1739   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1740     {
1741       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1742       if (reverse)
1743         op0 = flip_storage_order (mode1, op0);
1744       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1745     }
1746
1747   /* Handle fields bigger than a word.  */
1748
1749   if (bitsize > BITS_PER_WORD)
1750     {
1751       /* Here we transfer the words of the field
1752          in the order least significant first.
1753          This is because the most significant word is the one which may
1754          be less than full.  */
1755
1756       const bool backwards = WORDS_BIG_ENDIAN;
1757       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1758       unsigned int i;
1759       rtx_insn *last;
1760
1761       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1762         target = gen_reg_rtx (mode);
1763
1764       /* In case we're about to clobber a base register or something
1765          (see gcc.c-torture/execute/20040625-1.c).   */
1766       if (reg_mentioned_p (target, str_rtx))
1767         target = gen_reg_rtx (mode);
1768
1769       /* Indicate for flow that the entire target reg is being set.  */
1770       emit_clobber (target);
1771
1772       last = get_last_insn ();
1773       for (i = 0; i < nwords; i++)
1774         {
1775           /* If I is 0, use the low-order word in both field and target;
1776              if I is 1, use the next to lowest word; and so on.  */
1777           /* Word number in TARGET to use.  */
1778           unsigned int wordnum
1779             = (backwards
1780                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1781                : i);
1782           /* Offset from start of field in OP0.  */
1783           unsigned int bit_offset = (backwards ^ reverse
1784                                      ? MAX ((int) bitsize - ((int) i + 1)
1785                                             * BITS_PER_WORD,
1786                                             0)
1787                                      : (int) i * BITS_PER_WORD);
1788           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1789           rtx result_part
1790             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1791                                              bitsize - i * BITS_PER_WORD),
1792                                    bitnum + bit_offset, 1, target_part,
1793                                    mode, word_mode, reverse, fallback_p, NULL);
1794
1795           gcc_assert (target_part);
1796           if (!result_part)
1797             {
1798               delete_insns_since (last);
1799               return NULL;
1800             }
1801
1802           if (result_part != target_part)
1803             emit_move_insn (target_part, result_part);
1804         }
1805
1806       if (unsignedp)
1807         {
1808           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1809              need to be zero'd out.  */
1810           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1811             {
1812               unsigned int i, total_words;
1813
1814               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1815               for (i = nwords; i < total_words; i++)
1816                 emit_move_insn
1817                   (operand_subword (target,
1818                                     backwards ? total_words - i - 1 : i,
1819                                     1, VOIDmode),
1820                    const0_rtx);
1821             }
1822           return target;
1823         }
1824
1825       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1826       target = expand_shift (LSHIFT_EXPR, mode, target,
1827                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1828       return expand_shift (RSHIFT_EXPR, mode, target,
1829                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1830     }
1831
1832   /* If OP0 is a multi-word register, narrow it to the affected word.
1833      If the region spans two words, defer to extract_split_bit_field.  */
1834   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1835     {
1836       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1837         {
1838           if (!fallback_p)
1839             return NULL_RTX;
1840           target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1841                                             reverse);
1842           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1843         }
1844       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1845                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1846       bitnum %= BITS_PER_WORD;
1847     }
1848
1849   /* From here on we know the desired field is smaller than a word.
1850      If OP0 is a register, it too fits within a word.  */
1851   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1852   extraction_insn extv;
1853   if (!MEM_P (op0)
1854       && !reverse
1855       /* ??? We could limit the structure size to the part of OP0 that
1856          contains the field, with appropriate checks for endianness
1857          and TRULY_NOOP_TRUNCATION.  */
1858       && get_best_reg_extraction_insn (&extv, pattern,
1859                                        GET_MODE_BITSIZE (GET_MODE (op0)),
1860                                        tmode))
1861     {
1862       rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1863                                                  unsignedp, target, mode,
1864                                                  tmode);
1865       if (result)
1866         return result;
1867     }
1868
1869   /* If OP0 is a memory, try copying it to a register and seeing if a
1870      cheap register alternative is available.  */
1871   if (MEM_P (op0) & !reverse)
1872     {
1873       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1874                                         tmode))
1875         {
1876           rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1877                                                      bitnum, unsignedp,
1878                                                      target, mode,
1879                                                      tmode);
1880           if (result)
1881             return result;
1882         }
1883
1884       rtx_insn *last = get_last_insn ();
1885
1886       /* Try loading part of OP0 into a register and extracting the
1887          bitfield from that.  */
1888       unsigned HOST_WIDE_INT bitpos;
1889       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1890                                                0, 0, tmode, &bitpos);
1891       if (xop0)
1892         {
1893           xop0 = copy_to_reg (xop0);
1894           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1895                                             unsignedp, target,
1896                                             mode, tmode, reverse, false, NULL);
1897           if (result)
1898             return result;
1899           delete_insns_since (last);
1900         }
1901     }
1902
1903   if (!fallback_p)
1904     return NULL;
1905
1906   /* Find a correspondingly-sized integer field, so we can apply
1907      shifts and masks to it.  */
1908   int_mode = int_mode_for_mode (tmode);
1909   if (int_mode == BLKmode)
1910     int_mode = int_mode_for_mode (mode);
1911   /* Should probably push op0 out to memory and then do a load.  */
1912   gcc_assert (int_mode != BLKmode);
1913
1914   target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum, target,
1915                                     unsignedp, reverse);
1916
1917   /* Complex values must be reversed piecewise, so we need to undo the global
1918      reversal, convert to the complex mode and reverse again.  */
1919   if (reverse && COMPLEX_MODE_P (tmode))
1920     {
1921       target = flip_storage_order (int_mode, target);
1922       target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1923       target = flip_storage_order (tmode, target);
1924     }
1925   else
1926     target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1927
1928   return target;
1929 }
1930
1931 /* Generate code to extract a byte-field from STR_RTX
1932    containing BITSIZE bits, starting at BITNUM,
1933    and put it in TARGET if possible (if TARGET is nonzero).
1934    Regardless of TARGET, we return the rtx for where the value is placed.
1935
1936    STR_RTX is the structure containing the byte (a REG or MEM).
1937    UNSIGNEDP is nonzero if this is an unsigned bit field.
1938    MODE is the natural mode of the field value once extracted.
1939    TMODE is the mode the caller would like the value to have;
1940    but the value may be returned with type MODE instead.
1941
1942    If REVERSE is true, the extraction is to be done in reverse order.
1943
1944    If a TARGET is specified and we can store in it at no extra cost,
1945    we do so, and return TARGET.
1946    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1947    if they are equally easy.  */
1948
1949 rtx
1950 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1951                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1952                    machine_mode mode, machine_mode tmode, bool reverse,
1953                    rtx *alt_rtl)
1954 {
1955   machine_mode mode1;
1956
1957   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1958   if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1959     mode1 = GET_MODE (str_rtx);
1960   else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1961     mode1 = GET_MODE (target);
1962   else
1963     mode1 = tmode;
1964
1965   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1966     {
1967       /* Extraction of a full MODE1 value can be done with a simple load.
1968          We know here that the field can be accessed with one single
1969          instruction.  For targets that support unaligned memory,
1970          an unaligned access may be necessary.  */
1971       if (bitsize == GET_MODE_BITSIZE (mode1))
1972         {
1973           rtx result = adjust_bitfield_address (str_rtx, mode1,
1974                                                 bitnum / BITS_PER_UNIT);
1975           if (reverse)
1976             result = flip_storage_order (mode1, result);
1977           gcc_assert (bitnum % BITS_PER_UNIT == 0);
1978           return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1979         }
1980
1981       str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1982                                       &bitnum);
1983       gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (mode1));
1984       str_rtx = copy_to_reg (str_rtx);
1985     }
1986
1987   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1988                               target, mode, tmode, reverse, true, alt_rtl);
1989 }
1990 \f
1991 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1992    from bit BITNUM of OP0.
1993
1994    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1995    If REVERSE is true, the extraction is to be done in reverse order.
1996
1997    If TARGET is nonzero, attempts to store the value there
1998    and return TARGET, but this is not guaranteed.
1999    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
2000
2001 static rtx
2002 extract_fixed_bit_field (machine_mode tmode, rtx op0,
2003                          unsigned HOST_WIDE_INT bitsize,
2004                          unsigned HOST_WIDE_INT bitnum, rtx target,
2005                          int unsignedp, bool reverse)
2006 {
2007   if (MEM_P (op0))
2008     {
2009       machine_mode mode
2010         = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
2011                          MEM_VOLATILE_P (op0));
2012
2013       if (mode == VOIDmode)
2014         /* The only way this should occur is if the field spans word
2015            boundaries.  */
2016         return extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
2017                                         reverse);
2018
2019       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
2020     }
2021
2022   return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
2023                                     target, unsignedp, reverse);
2024 }
2025
2026 /* Helper function for extract_fixed_bit_field, extracts
2027    the bit field always using the MODE of OP0.  */
2028
2029 static rtx
2030 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
2031                            unsigned HOST_WIDE_INT bitsize,
2032                            unsigned HOST_WIDE_INT bitnum, rtx target,
2033                            int unsignedp, bool reverse)
2034 {
2035   machine_mode mode = GET_MODE (op0);
2036   gcc_assert (SCALAR_INT_MODE_P (mode));
2037
2038   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2039      for invalid input, such as extract equivalent of f5 from
2040      gcc.dg/pr48335-2.c.  */
2041
2042   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2043     /* BITNUM is the distance between our msb and that of OP0.
2044        Convert it to the distance from the lsb.  */
2045     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2046
2047   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2048      We have reduced the big-endian case to the little-endian case.  */
2049   if (reverse)
2050     op0 = flip_storage_order (mode, op0);
2051
2052   if (unsignedp)
2053     {
2054       if (bitnum)
2055         {
2056           /* If the field does not already start at the lsb,
2057              shift it so it does.  */
2058           /* Maybe propagate the target for the shift.  */
2059           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2060           if (tmode != mode)
2061             subtarget = 0;
2062           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2063         }
2064       /* Convert the value to the desired mode.  */
2065       if (mode != tmode)
2066         op0 = convert_to_mode (tmode, op0, 1);
2067
2068       /* Unless the msb of the field used to be the msb when we shifted,
2069          mask out the upper bits.  */
2070
2071       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2072         return expand_binop (GET_MODE (op0), and_optab, op0,
2073                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
2074                              target, 1, OPTAB_LIB_WIDEN);
2075       return op0;
2076     }
2077
2078   /* To extract a signed bit-field, first shift its msb to the msb of the word,
2079      then arithmetic-shift its lsb to the lsb of the word.  */
2080   op0 = force_reg (mode, op0);
2081
2082   /* Find the narrowest integer mode that contains the field.  */
2083
2084   FOR_EACH_MODE_IN_CLASS (mode, MODE_INT)
2085     if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
2086       {
2087         op0 = convert_to_mode (mode, op0, 0);
2088         break;
2089       }
2090
2091   if (mode != tmode)
2092     target = 0;
2093
2094   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2095     {
2096       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2097       /* Maybe propagate the target for the shift.  */
2098       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2099       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2100     }
2101
2102   return expand_shift (RSHIFT_EXPR, mode, op0,
2103                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2104 }
2105
2106 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2107    VALUE << BITPOS.  */
2108
2109 static rtx
2110 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2111               int bitpos)
2112 {
2113   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2114 }
2115 \f
2116 /* Extract a bit field that is split across two words
2117    and return an RTX for the result.
2118
2119    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2120    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2121    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2122
2123    If REVERSE is true, the extraction is to be done in reverse order.  */
2124
2125 static rtx
2126 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2127                          unsigned HOST_WIDE_INT bitpos, int unsignedp,
2128                          bool reverse)
2129 {
2130   unsigned int unit;
2131   unsigned int bitsdone = 0;
2132   rtx result = NULL_RTX;
2133   int first = 1;
2134
2135   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2136      much at a time.  */
2137   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2138     unit = BITS_PER_WORD;
2139   else
2140     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2141
2142   while (bitsdone < bitsize)
2143     {
2144       unsigned HOST_WIDE_INT thissize;
2145       rtx part, word;
2146       unsigned HOST_WIDE_INT thispos;
2147       unsigned HOST_WIDE_INT offset;
2148
2149       offset = (bitpos + bitsdone) / unit;
2150       thispos = (bitpos + bitsdone) % unit;
2151
2152       /* THISSIZE must not overrun a word boundary.  Otherwise,
2153          extract_fixed_bit_field will call us again, and we will mutually
2154          recurse forever.  */
2155       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2156       thissize = MIN (thissize, unit - thispos);
2157
2158       /* If OP0 is a register, then handle OFFSET here.  */
2159       if (SUBREG_P (op0) || REG_P (op0))
2160         {
2161           word = operand_subword_force (op0, offset, GET_MODE (op0));
2162           offset = 0;
2163         }
2164       else
2165         word = op0;
2166
2167       /* Extract the parts in bit-counting order,
2168          whose meaning is determined by BYTES_PER_UNIT.
2169          OFFSET is in UNITs, and UNIT is in bits.  */
2170       part = extract_fixed_bit_field (word_mode, word, thissize,
2171                                       offset * unit + thispos, 0, 1, reverse);
2172       bitsdone += thissize;
2173
2174       /* Shift this part into place for the result.  */
2175       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2176         {
2177           if (bitsize != bitsdone)
2178             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2179                                  bitsize - bitsdone, 0, 1);
2180         }
2181       else
2182         {
2183           if (bitsdone != thissize)
2184             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2185                                  bitsdone - thissize, 0, 1);
2186         }
2187
2188       if (first)
2189         result = part;
2190       else
2191         /* Combine the parts with bitwise or.  This works
2192            because we extracted each part as an unsigned bit field.  */
2193         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2194                                OPTAB_LIB_WIDEN);
2195
2196       first = 0;
2197     }
2198
2199   /* Unsigned bit field: we are done.  */
2200   if (unsignedp)
2201     return result;
2202   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2203   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2204                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2205   return expand_shift (RSHIFT_EXPR, word_mode, result,
2206                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2207 }
2208 \f
2209 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2210    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2211    MODE, fill the upper bits with zeros.  Fail if the layout of either
2212    mode is unknown (as for CC modes) or if the extraction would involve
2213    unprofitable mode punning.  Return the value on success, otherwise
2214    return null.
2215
2216    This is different from gen_lowpart* in these respects:
2217
2218      - the returned value must always be considered an rvalue
2219
2220      - when MODE is wider than SRC_MODE, the extraction involves
2221        a zero extension
2222
2223      - when MODE is smaller than SRC_MODE, the extraction involves
2224        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2225
2226    In other words, this routine performs a computation, whereas the
2227    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2228    operations.  */
2229
2230 rtx
2231 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2232 {
2233   machine_mode int_mode, src_int_mode;
2234
2235   if (mode == src_mode)
2236     return src;
2237
2238   if (CONSTANT_P (src))
2239     {
2240       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2241          fails, it will happily create (subreg (symbol_ref)) or similar
2242          invalid SUBREGs.  */
2243       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2244       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2245       if (ret)
2246         return ret;
2247
2248       if (GET_MODE (src) == VOIDmode
2249           || !validate_subreg (mode, src_mode, src, byte))
2250         return NULL_RTX;
2251
2252       src = force_reg (GET_MODE (src), src);
2253       return gen_rtx_SUBREG (mode, src, byte);
2254     }
2255
2256   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2257     return NULL_RTX;
2258
2259   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2260       && MODES_TIEABLE_P (mode, src_mode))
2261     {
2262       rtx x = gen_lowpart_common (mode, src);
2263       if (x)
2264         return x;
2265     }
2266
2267   src_int_mode = int_mode_for_mode (src_mode);
2268   int_mode = int_mode_for_mode (mode);
2269   if (src_int_mode == BLKmode || int_mode == BLKmode)
2270     return NULL_RTX;
2271
2272   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2273     return NULL_RTX;
2274   if (!MODES_TIEABLE_P (int_mode, mode))
2275     return NULL_RTX;
2276
2277   src = gen_lowpart (src_int_mode, src);
2278   src = convert_modes (int_mode, src_int_mode, src, true);
2279   src = gen_lowpart (mode, src);
2280   return src;
2281 }
2282 \f
2283 /* Add INC into TARGET.  */
2284
2285 void
2286 expand_inc (rtx target, rtx inc)
2287 {
2288   rtx value = expand_binop (GET_MODE (target), add_optab,
2289                             target, inc,
2290                             target, 0, OPTAB_LIB_WIDEN);
2291   if (value != target)
2292     emit_move_insn (target, value);
2293 }
2294
2295 /* Subtract DEC from TARGET.  */
2296
2297 void
2298 expand_dec (rtx target, rtx dec)
2299 {
2300   rtx value = expand_binop (GET_MODE (target), sub_optab,
2301                             target, dec,
2302                             target, 0, OPTAB_LIB_WIDEN);
2303   if (value != target)
2304     emit_move_insn (target, value);
2305 }
2306 \f
2307 /* Output a shift instruction for expression code CODE,
2308    with SHIFTED being the rtx for the value to shift,
2309    and AMOUNT the rtx for the amount to shift by.
2310    Store the result in the rtx TARGET, if that is convenient.
2311    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2312    Return the rtx for where the value is.
2313    If that cannot be done, abort the compilation unless MAY_FAIL is true,
2314    in which case 0 is returned.  */
2315
2316 static rtx
2317 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2318                 rtx amount, rtx target, int unsignedp, bool may_fail = false)
2319 {
2320   rtx op1, temp = 0;
2321   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2322   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2323   optab lshift_optab = ashl_optab;
2324   optab rshift_arith_optab = ashr_optab;
2325   optab rshift_uns_optab = lshr_optab;
2326   optab lrotate_optab = rotl_optab;
2327   optab rrotate_optab = rotr_optab;
2328   machine_mode op1_mode;
2329   machine_mode scalar_mode = mode;
2330   int attempt;
2331   bool speed = optimize_insn_for_speed_p ();
2332
2333   if (VECTOR_MODE_P (mode))
2334     scalar_mode = GET_MODE_INNER (mode);
2335   op1 = amount;
2336   op1_mode = GET_MODE (op1);
2337
2338   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2339      shift amount is a vector, use the vector/vector shift patterns.  */
2340   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2341     {
2342       lshift_optab = vashl_optab;
2343       rshift_arith_optab = vashr_optab;
2344       rshift_uns_optab = vlshr_optab;
2345       lrotate_optab = vrotl_optab;
2346       rrotate_optab = vrotr_optab;
2347     }
2348
2349   /* Previously detected shift-counts computed by NEGATE_EXPR
2350      and shifted in the other direction; but that does not work
2351      on all machines.  */
2352
2353   if (SHIFT_COUNT_TRUNCATED)
2354     {
2355       if (CONST_INT_P (op1)
2356           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2357               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2358         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2359                        % GET_MODE_BITSIZE (scalar_mode));
2360       else if (GET_CODE (op1) == SUBREG
2361                && subreg_lowpart_p (op1)
2362                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2363                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2364         op1 = SUBREG_REG (op1);
2365     }
2366
2367   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2368      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2369      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2370      amount instead.  */
2371   if (rotate
2372       && CONST_INT_P (op1)
2373       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2374                    GET_MODE_BITSIZE (scalar_mode) - 1))
2375     {
2376       op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2377       left = !left;
2378       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2379     }
2380
2381   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2382      Note that this is not the case for bigger values.  For instance a rotation
2383      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2384      0x04030201 (bswapsi).  */
2385   if (rotate
2386       && CONST_INT_P (op1)
2387       && INTVAL (op1) == BITS_PER_UNIT
2388       && GET_MODE_SIZE (scalar_mode) == 2
2389       && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2390     return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2391                                   unsignedp);
2392
2393   if (op1 == const0_rtx)
2394     return shifted;
2395
2396   /* Check whether its cheaper to implement a left shift by a constant
2397      bit count by a sequence of additions.  */
2398   if (code == LSHIFT_EXPR
2399       && CONST_INT_P (op1)
2400       && INTVAL (op1) > 0
2401       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2402       && INTVAL (op1) < MAX_BITS_PER_WORD
2403       && (shift_cost (speed, mode, INTVAL (op1))
2404           > INTVAL (op1) * add_cost (speed, mode))
2405       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2406     {
2407       int i;
2408       for (i = 0; i < INTVAL (op1); i++)
2409         {
2410           temp = force_reg (mode, shifted);
2411           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2412                                   unsignedp, OPTAB_LIB_WIDEN);
2413         }
2414       return shifted;
2415     }
2416
2417   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2418     {
2419       enum optab_methods methods;
2420
2421       if (attempt == 0)
2422         methods = OPTAB_DIRECT;
2423       else if (attempt == 1)
2424         methods = OPTAB_WIDEN;
2425       else
2426         methods = OPTAB_LIB_WIDEN;
2427
2428       if (rotate)
2429         {
2430           /* Widening does not work for rotation.  */
2431           if (methods == OPTAB_WIDEN)
2432             continue;
2433           else if (methods == OPTAB_LIB_WIDEN)
2434             {
2435               /* If we have been unable to open-code this by a rotation,
2436                  do it as the IOR of two shifts.  I.e., to rotate A
2437                  by N bits, compute
2438                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2439                  where C is the bitsize of A.
2440
2441                  It is theoretically possible that the target machine might
2442                  not be able to perform either shift and hence we would
2443                  be making two libcalls rather than just the one for the
2444                  shift (similarly if IOR could not be done).  We will allow
2445                  this extremely unlikely lossage to avoid complicating the
2446                  code below.  */
2447
2448               rtx subtarget = target == shifted ? 0 : target;
2449               rtx new_amount, other_amount;
2450               rtx temp1;
2451
2452               new_amount = op1;
2453               if (op1 == const0_rtx)
2454                 return shifted;
2455               else if (CONST_INT_P (op1))
2456                 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2457                                         - INTVAL (op1));
2458               else
2459                 {
2460                   other_amount
2461                     = simplify_gen_unary (NEG, GET_MODE (op1),
2462                                           op1, GET_MODE (op1));
2463                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2464                   other_amount
2465                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2466                                            gen_int_mode (mask, GET_MODE (op1)));
2467                 }
2468
2469               shifted = force_reg (mode, shifted);
2470
2471               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2472                                      mode, shifted, new_amount, 0, 1);
2473               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2474                                       mode, shifted, other_amount,
2475                                       subtarget, 1);
2476               return expand_binop (mode, ior_optab, temp, temp1, target,
2477                                    unsignedp, methods);
2478             }
2479
2480           temp = expand_binop (mode,
2481                                left ? lrotate_optab : rrotate_optab,
2482                                shifted, op1, target, unsignedp, methods);
2483         }
2484       else if (unsignedp)
2485         temp = expand_binop (mode,
2486                              left ? lshift_optab : rshift_uns_optab,
2487                              shifted, op1, target, unsignedp, methods);
2488
2489       /* Do arithmetic shifts.
2490          Also, if we are going to widen the operand, we can just as well
2491          use an arithmetic right-shift instead of a logical one.  */
2492       if (temp == 0 && ! rotate
2493           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2494         {
2495           enum optab_methods methods1 = methods;
2496
2497           /* If trying to widen a log shift to an arithmetic shift,
2498              don't accept an arithmetic shift of the same size.  */
2499           if (unsignedp)
2500             methods1 = OPTAB_MUST_WIDEN;
2501
2502           /* Arithmetic shift */
2503
2504           temp = expand_binop (mode,
2505                                left ? lshift_optab : rshift_arith_optab,
2506                                shifted, op1, target, unsignedp, methods1);
2507         }
2508
2509       /* We used to try extzv here for logical right shifts, but that was
2510          only useful for one machine, the VAX, and caused poor code
2511          generation there for lshrdi3, so the code was deleted and a
2512          define_expand for lshrsi3 was added to vax.md.  */
2513     }
2514
2515   gcc_assert (temp != NULL_RTX || may_fail);
2516   return temp;
2517 }
2518
2519 /* Output a shift instruction for expression code CODE,
2520    with SHIFTED being the rtx for the value to shift,
2521    and AMOUNT the amount to shift by.
2522    Store the result in the rtx TARGET, if that is convenient.
2523    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2524    Return the rtx for where the value is.  */
2525
2526 rtx
2527 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2528               int amount, rtx target, int unsignedp)
2529 {
2530   return expand_shift_1 (code, mode,
2531                          shifted, GEN_INT (amount), target, unsignedp);
2532 }
2533
2534 /* Likewise, but return 0 if that cannot be done.  */
2535
2536 static rtx
2537 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2538                     int amount, rtx target, int unsignedp)
2539 {
2540   return expand_shift_1 (code, mode,
2541                          shifted, GEN_INT (amount), target, unsignedp, true);
2542 }
2543
2544 /* Output a shift instruction for expression code CODE,
2545    with SHIFTED being the rtx for the value to shift,
2546    and AMOUNT the tree for the amount to shift by.
2547    Store the result in the rtx TARGET, if that is convenient.
2548    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2549    Return the rtx for where the value is.  */
2550
2551 rtx
2552 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2553                        tree amount, rtx target, int unsignedp)
2554 {
2555   return expand_shift_1 (code, mode,
2556                          shifted, expand_normal (amount), target, unsignedp);
2557 }
2558
2559 \f
2560 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2561                         const struct mult_cost *, machine_mode mode);
2562 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2563                               const struct algorithm *, enum mult_variant);
2564 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2565 static rtx extract_high_half (machine_mode, rtx);
2566 static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2567 static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
2568                                        int, int);
2569 /* Compute and return the best algorithm for multiplying by T.
2570    The algorithm must cost less than cost_limit
2571    If retval.cost >= COST_LIMIT, no algorithm was found and all
2572    other field of the returned struct are undefined.
2573    MODE is the machine mode of the multiplication.  */
2574
2575 static void
2576 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2577             const struct mult_cost *cost_limit, machine_mode mode)
2578 {
2579   int m;
2580   struct algorithm *alg_in, *best_alg;
2581   struct mult_cost best_cost;
2582   struct mult_cost new_limit;
2583   int op_cost, op_latency;
2584   unsigned HOST_WIDE_INT orig_t = t;
2585   unsigned HOST_WIDE_INT q;
2586   int maxm, hash_index;
2587   bool cache_hit = false;
2588   enum alg_code cache_alg = alg_zero;
2589   bool speed = optimize_insn_for_speed_p ();
2590   machine_mode imode;
2591   struct alg_hash_entry *entry_ptr;
2592
2593   /* Indicate that no algorithm is yet found.  If no algorithm
2594      is found, this value will be returned and indicate failure.  */
2595   alg_out->cost.cost = cost_limit->cost + 1;
2596   alg_out->cost.latency = cost_limit->latency + 1;
2597
2598   if (cost_limit->cost < 0
2599       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2600     return;
2601
2602   /* Be prepared for vector modes.  */
2603   imode = GET_MODE_INNER (mode);
2604
2605   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2606
2607   /* Restrict the bits of "t" to the multiplication's mode.  */
2608   t &= GET_MODE_MASK (imode);
2609
2610   /* t == 1 can be done in zero cost.  */
2611   if (t == 1)
2612     {
2613       alg_out->ops = 1;
2614       alg_out->cost.cost = 0;
2615       alg_out->cost.latency = 0;
2616       alg_out->op[0] = alg_m;
2617       return;
2618     }
2619
2620   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2621      fail now.  */
2622   if (t == 0)
2623     {
2624       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2625         return;
2626       else
2627         {
2628           alg_out->ops = 1;
2629           alg_out->cost.cost = zero_cost (speed);
2630           alg_out->cost.latency = zero_cost (speed);
2631           alg_out->op[0] = alg_zero;
2632           return;
2633         }
2634     }
2635
2636   /* We'll be needing a couple extra algorithm structures now.  */
2637
2638   alg_in = XALLOCA (struct algorithm);
2639   best_alg = XALLOCA (struct algorithm);
2640   best_cost = *cost_limit;
2641
2642   /* Compute the hash index.  */
2643   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2644
2645   /* See if we already know what to do for T.  */
2646   entry_ptr = alg_hash_entry_ptr (hash_index);
2647   if (entry_ptr->t == t
2648       && entry_ptr->mode == mode
2649       && entry_ptr->speed == speed
2650       && entry_ptr->alg != alg_unknown)
2651     {
2652       cache_alg = entry_ptr->alg;
2653
2654       if (cache_alg == alg_impossible)
2655         {
2656           /* The cache tells us that it's impossible to synthesize
2657              multiplication by T within entry_ptr->cost.  */
2658           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2659             /* COST_LIMIT is at least as restrictive as the one
2660                recorded in the hash table, in which case we have no
2661                hope of synthesizing a multiplication.  Just
2662                return.  */
2663             return;
2664
2665           /* If we get here, COST_LIMIT is less restrictive than the
2666              one recorded in the hash table, so we may be able to
2667              synthesize a multiplication.  Proceed as if we didn't
2668              have the cache entry.  */
2669         }
2670       else
2671         {
2672           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2673             /* The cached algorithm shows that this multiplication
2674                requires more cost than COST_LIMIT.  Just return.  This
2675                way, we don't clobber this cache entry with
2676                alg_impossible but retain useful information.  */
2677             return;
2678
2679           cache_hit = true;
2680
2681           switch (cache_alg)
2682             {
2683             case alg_shift:
2684               goto do_alg_shift;
2685
2686             case alg_add_t_m2:
2687             case alg_sub_t_m2:
2688               goto do_alg_addsub_t_m2;
2689
2690             case alg_add_factor:
2691             case alg_sub_factor:
2692               goto do_alg_addsub_factor;
2693
2694             case alg_add_t2_m:
2695               goto do_alg_add_t2_m;
2696
2697             case alg_sub_t2_m:
2698               goto do_alg_sub_t2_m;
2699
2700             default:
2701               gcc_unreachable ();
2702             }
2703         }
2704     }
2705
2706   /* If we have a group of zero bits at the low-order part of T, try
2707      multiplying by the remaining bits and then doing a shift.  */
2708
2709   if ((t & 1) == 0)
2710     {
2711     do_alg_shift:
2712       m = ctz_or_zero (t); /* m = number of low zero bits */
2713       if (m < maxm)
2714         {
2715           q = t >> m;
2716           /* The function expand_shift will choose between a shift and
2717              a sequence of additions, so the observed cost is given as
2718              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2719           op_cost = m * add_cost (speed, mode);
2720           if (shift_cost (speed, mode, m) < op_cost)
2721             op_cost = shift_cost (speed, mode, m);
2722           new_limit.cost = best_cost.cost - op_cost;
2723           new_limit.latency = best_cost.latency - op_cost;
2724           synth_mult (alg_in, q, &new_limit, mode);
2725
2726           alg_in->cost.cost += op_cost;
2727           alg_in->cost.latency += op_cost;
2728           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2729             {
2730               best_cost = alg_in->cost;
2731               std::swap (alg_in, best_alg);
2732               best_alg->log[best_alg->ops] = m;
2733               best_alg->op[best_alg->ops] = alg_shift;
2734             }
2735
2736           /* See if treating ORIG_T as a signed number yields a better
2737              sequence.  Try this sequence only for a negative ORIG_T
2738              as it would be useless for a non-negative ORIG_T.  */
2739           if ((HOST_WIDE_INT) orig_t < 0)
2740             {
2741               /* Shift ORIG_T as follows because a right shift of a
2742                  negative-valued signed type is implementation
2743                  defined.  */
2744               q = ~(~orig_t >> m);
2745               /* The function expand_shift will choose between a shift
2746                  and a sequence of additions, so the observed cost is
2747                  given as MIN (m * add_cost(speed, mode),
2748                  shift_cost(speed, mode, m)).  */
2749               op_cost = m * add_cost (speed, mode);
2750               if (shift_cost (speed, mode, m) < op_cost)
2751                 op_cost = shift_cost (speed, mode, m);
2752               new_limit.cost = best_cost.cost - op_cost;
2753               new_limit.latency = best_cost.latency - op_cost;
2754               synth_mult (alg_in, q, &new_limit, mode);
2755
2756               alg_in->cost.cost += op_cost;
2757               alg_in->cost.latency += op_cost;
2758               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2759                 {
2760                   best_cost = alg_in->cost;
2761                   std::swap (alg_in, best_alg);
2762                   best_alg->log[best_alg->ops] = m;
2763                   best_alg->op[best_alg->ops] = alg_shift;
2764                 }
2765             }
2766         }
2767       if (cache_hit)
2768         goto done;
2769     }
2770
2771   /* If we have an odd number, add or subtract one.  */
2772   if ((t & 1) != 0)
2773     {
2774       unsigned HOST_WIDE_INT w;
2775
2776     do_alg_addsub_t_m2:
2777       for (w = 1; (w & t) != 0; w <<= 1)
2778         ;
2779       /* If T was -1, then W will be zero after the loop.  This is another
2780          case where T ends with ...111.  Handling this with (T + 1) and
2781          subtract 1 produces slightly better code and results in algorithm
2782          selection much faster than treating it like the ...0111 case
2783          below.  */
2784       if (w == 0
2785           || (w > 2
2786               /* Reject the case where t is 3.
2787                  Thus we prefer addition in that case.  */
2788               && t != 3))
2789         {
2790           /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2791
2792           op_cost = add_cost (speed, mode);
2793           new_limit.cost = best_cost.cost - op_cost;
2794           new_limit.latency = best_cost.latency - op_cost;
2795           synth_mult (alg_in, t + 1, &new_limit, mode);
2796
2797           alg_in->cost.cost += op_cost;
2798           alg_in->cost.latency += op_cost;
2799           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2800             {
2801               best_cost = alg_in->cost;
2802               std::swap (alg_in, best_alg);
2803               best_alg->log[best_alg->ops] = 0;
2804               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2805             }
2806         }
2807       else
2808         {
2809           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2810
2811           op_cost = add_cost (speed, mode);
2812           new_limit.cost = best_cost.cost - op_cost;
2813           new_limit.latency = best_cost.latency - op_cost;
2814           synth_mult (alg_in, t - 1, &new_limit, mode);
2815
2816           alg_in->cost.cost += op_cost;
2817           alg_in->cost.latency += op_cost;
2818           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2819             {
2820               best_cost = alg_in->cost;
2821               std::swap (alg_in, best_alg);
2822               best_alg->log[best_alg->ops] = 0;
2823               best_alg->op[best_alg->ops] = alg_add_t_m2;
2824             }
2825         }
2826
2827       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2828          quickly with a - a * n for some appropriate constant n.  */
2829       m = exact_log2 (-orig_t + 1);
2830       if (m >= 0 && m < maxm)
2831         {
2832           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2833           /* If the target has a cheap shift-and-subtract insn use
2834              that in preference to a shift insn followed by a sub insn.
2835              Assume that the shift-and-sub is "atomic" with a latency
2836              equal to it's cost, otherwise assume that on superscalar
2837              hardware the shift may be executed concurrently with the
2838              earlier steps in the algorithm.  */
2839           if (shiftsub1_cost (speed, mode, m) <= op_cost)
2840             {
2841               op_cost = shiftsub1_cost (speed, mode, m);
2842               op_latency = op_cost;
2843             }
2844           else
2845             op_latency = add_cost (speed, mode);
2846
2847           new_limit.cost = best_cost.cost - op_cost;
2848           new_limit.latency = best_cost.latency - op_latency;
2849           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2850                       &new_limit, mode);
2851
2852           alg_in->cost.cost += op_cost;
2853           alg_in->cost.latency += op_latency;
2854           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2855             {
2856               best_cost = alg_in->cost;
2857               std::swap (alg_in, best_alg);
2858               best_alg->log[best_alg->ops] = m;
2859               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2860             }
2861         }
2862
2863       if (cache_hit)
2864         goto done;
2865     }
2866
2867   /* Look for factors of t of the form
2868      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2869      If we find such a factor, we can multiply by t using an algorithm that
2870      multiplies by q, shift the result by m and add/subtract it to itself.
2871
2872      We search for large factors first and loop down, even if large factors
2873      are less probable than small; if we find a large factor we will find a
2874      good sequence quickly, and therefore be able to prune (by decreasing
2875      COST_LIMIT) the search.  */
2876
2877  do_alg_addsub_factor:
2878   for (m = floor_log2 (t - 1); m >= 2; m--)
2879     {
2880       unsigned HOST_WIDE_INT d;
2881
2882       d = (HOST_WIDE_INT_1U << m) + 1;
2883       if (t % d == 0 && t > d && m < maxm
2884           && (!cache_hit || cache_alg == alg_add_factor))
2885         {
2886           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2887           if (shiftadd_cost (speed, mode, m) <= op_cost)
2888             op_cost = shiftadd_cost (speed, mode, m);
2889
2890           op_latency = op_cost;
2891
2892
2893           new_limit.cost = best_cost.cost - op_cost;
2894           new_limit.latency = best_cost.latency - op_latency;
2895           synth_mult (alg_in, t / d, &new_limit, mode);
2896
2897           alg_in->cost.cost += op_cost;
2898           alg_in->cost.latency += op_latency;
2899           if (alg_in->cost.latency < op_cost)
2900             alg_in->cost.latency = op_cost;
2901           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2902             {
2903               best_cost = alg_in->cost;
2904               std::swap (alg_in, best_alg);
2905               best_alg->log[best_alg->ops] = m;
2906               best_alg->op[best_alg->ops] = alg_add_factor;
2907             }
2908           /* Other factors will have been taken care of in the recursion.  */
2909           break;
2910         }
2911
2912       d = (HOST_WIDE_INT_1U << m) - 1;
2913       if (t % d == 0 && t > d && m < maxm
2914           && (!cache_hit || cache_alg == alg_sub_factor))
2915         {
2916           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2917           if (shiftsub0_cost (speed, mode, m) <= op_cost)
2918             op_cost = shiftsub0_cost (speed, mode, m);
2919
2920           op_latency = op_cost;
2921
2922           new_limit.cost = best_cost.cost - op_cost;
2923           new_limit.latency = best_cost.latency - op_latency;
2924           synth_mult (alg_in, t / d, &new_limit, mode);
2925
2926           alg_in->cost.cost += op_cost;
2927           alg_in->cost.latency += op_latency;
2928           if (alg_in->cost.latency < op_cost)
2929             alg_in->cost.latency = op_cost;
2930           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2931             {
2932               best_cost = alg_in->cost;
2933               std::swap (alg_in, best_alg);
2934               best_alg->log[best_alg->ops] = m;
2935               best_alg->op[best_alg->ops] = alg_sub_factor;
2936             }
2937           break;
2938         }
2939     }
2940   if (cache_hit)
2941     goto done;
2942
2943   /* Try shift-and-add (load effective address) instructions,
2944      i.e. do a*3, a*5, a*9.  */
2945   if ((t & 1) != 0)
2946     {
2947     do_alg_add_t2_m:
2948       q = t - 1;
2949       m = ctz_hwi (q);
2950       if (q && m < maxm)
2951         {
2952           op_cost = shiftadd_cost (speed, mode, m);
2953           new_limit.cost = best_cost.cost - op_cost;
2954           new_limit.latency = best_cost.latency - op_cost;
2955           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2956
2957           alg_in->cost.cost += op_cost;
2958           alg_in->cost.latency += op_cost;
2959           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2960             {
2961               best_cost = alg_in->cost;
2962               std::swap (alg_in, best_alg);
2963               best_alg->log[best_alg->ops] = m;
2964               best_alg->op[best_alg->ops] = alg_add_t2_m;
2965             }
2966         }
2967       if (cache_hit)
2968         goto done;
2969
2970     do_alg_sub_t2_m:
2971       q = t + 1;
2972       m = ctz_hwi (q);
2973       if (q && m < maxm)
2974         {
2975           op_cost = shiftsub0_cost (speed, mode, m);
2976           new_limit.cost = best_cost.cost - op_cost;
2977           new_limit.latency = best_cost.latency - op_cost;
2978           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2979
2980           alg_in->cost.cost += op_cost;
2981           alg_in->cost.latency += op_cost;
2982           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2983             {
2984               best_cost = alg_in->cost;
2985               std::swap (alg_in, best_alg);
2986               best_alg->log[best_alg->ops] = m;
2987               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2988             }
2989         }
2990       if (cache_hit)
2991         goto done;
2992     }
2993
2994  done:
2995   /* If best_cost has not decreased, we have not found any algorithm.  */
2996   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2997     {
2998       /* We failed to find an algorithm.  Record alg_impossible for
2999          this case (that is, <T, MODE, COST_LIMIT>) so that next time
3000          we are asked to find an algorithm for T within the same or
3001          lower COST_LIMIT, we can immediately return to the
3002          caller.  */
3003       entry_ptr->t = t;
3004       entry_ptr->mode = mode;
3005       entry_ptr->speed = speed;
3006       entry_ptr->alg = alg_impossible;
3007       entry_ptr->cost = *cost_limit;
3008       return;
3009     }
3010
3011   /* Cache the result.  */
3012   if (!cache_hit)
3013     {
3014       entry_ptr->t = t;
3015       entry_ptr->mode = mode;
3016       entry_ptr->speed = speed;
3017       entry_ptr->alg = best_alg->op[best_alg->ops];
3018       entry_ptr->cost.cost = best_cost.cost;
3019       entry_ptr->cost.latency = best_cost.latency;
3020     }
3021
3022   /* If we are getting a too long sequence for `struct algorithm'
3023      to record, make this search fail.  */
3024   if (best_alg->ops == MAX_BITS_PER_WORD)
3025     return;
3026
3027   /* Copy the algorithm from temporary space to the space at alg_out.
3028      We avoid using structure assignment because the majority of
3029      best_alg is normally undefined, and this is a critical function.  */
3030   alg_out->ops = best_alg->ops + 1;
3031   alg_out->cost = best_cost;
3032   memcpy (alg_out->op, best_alg->op,
3033           alg_out->ops * sizeof *alg_out->op);
3034   memcpy (alg_out->log, best_alg->log,
3035           alg_out->ops * sizeof *alg_out->log);
3036 }
3037 \f
3038 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3039    Try three variations:
3040
3041        - a shift/add sequence based on VAL itself
3042        - a shift/add sequence based on -VAL, followed by a negation
3043        - a shift/add sequence based on VAL - 1, followed by an addition.
3044
3045    Return true if the cheapest of these cost less than MULT_COST,
3046    describing the algorithm in *ALG and final fixup in *VARIANT.  */
3047
3048 bool
3049 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3050                      struct algorithm *alg, enum mult_variant *variant,
3051                      int mult_cost)
3052 {
3053   struct algorithm alg2;
3054   struct mult_cost limit;
3055   int op_cost;
3056   bool speed = optimize_insn_for_speed_p ();
3057
3058   /* Fail quickly for impossible bounds.  */
3059   if (mult_cost < 0)
3060     return false;
3061
3062   /* Ensure that mult_cost provides a reasonable upper bound.
3063      Any constant multiplication can be performed with less
3064      than 2 * bits additions.  */
3065   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3066   if (mult_cost > op_cost)
3067     mult_cost = op_cost;
3068
3069   *variant = basic_variant;
3070   limit.cost = mult_cost;
3071   limit.latency = mult_cost;
3072   synth_mult (alg, val, &limit, mode);
3073
3074   /* This works only if the inverted value actually fits in an
3075      `unsigned int' */
3076   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3077     {
3078       op_cost = neg_cost (speed, mode);
3079       if (MULT_COST_LESS (&alg->cost, mult_cost))
3080         {
3081           limit.cost = alg->cost.cost - op_cost;
3082           limit.latency = alg->cost.latency - op_cost;
3083         }
3084       else
3085         {
3086           limit.cost = mult_cost - op_cost;
3087           limit.latency = mult_cost - op_cost;
3088         }
3089
3090       synth_mult (&alg2, -val, &limit, mode);
3091       alg2.cost.cost += op_cost;
3092       alg2.cost.latency += op_cost;
3093       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3094         *alg = alg2, *variant = negate_variant;
3095     }
3096
3097   /* This proves very useful for division-by-constant.  */
3098   op_cost = add_cost (speed, mode);
3099   if (MULT_COST_LESS (&alg->cost, mult_cost))
3100     {
3101       limit.cost = alg->cost.cost - op_cost;
3102       limit.latency = alg->cost.latency - op_cost;
3103     }
3104   else
3105     {
3106       limit.cost = mult_cost - op_cost;
3107       limit.latency = mult_cost - op_cost;
3108     }
3109
3110   synth_mult (&alg2, val - 1, &limit, mode);
3111   alg2.cost.cost += op_cost;
3112   alg2.cost.latency += op_cost;
3113   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3114     *alg = alg2, *variant = add_variant;
3115
3116   return MULT_COST_LESS (&alg->cost, mult_cost);
3117 }
3118
3119 /* A subroutine of expand_mult, used for constant multiplications.
3120    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3121    convenient.  Use the shift/add sequence described by ALG and apply
3122    the final fixup specified by VARIANT.  */
3123
3124 static rtx
3125 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3126                    rtx target, const struct algorithm *alg,
3127                    enum mult_variant variant)
3128 {
3129   unsigned HOST_WIDE_INT val_so_far;
3130   rtx_insn *insn;
3131   rtx accum, tem;
3132   int opno;
3133   machine_mode nmode;
3134
3135   /* Avoid referencing memory over and over and invalid sharing
3136      on SUBREGs.  */
3137   op0 = force_reg (mode, op0);
3138
3139   /* ACCUM starts out either as OP0 or as a zero, depending on
3140      the first operation.  */
3141
3142   if (alg->op[0] == alg_zero)
3143     {
3144       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3145       val_so_far = 0;
3146     }
3147   else if (alg->op[0] == alg_m)
3148     {
3149       accum = copy_to_mode_reg (mode, op0);
3150       val_so_far = 1;
3151     }
3152   else
3153     gcc_unreachable ();
3154
3155   for (opno = 1; opno < alg->ops; opno++)
3156     {
3157       int log = alg->log[opno];
3158       rtx shift_subtarget = optimize ? 0 : accum;
3159       rtx add_target
3160         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3161            && !optimize)
3162           ? target : 0;
3163       rtx accum_target = optimize ? 0 : accum;
3164       rtx accum_inner;
3165
3166       switch (alg->op[opno])
3167         {
3168         case alg_shift:
3169           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3170           /* REG_EQUAL note will be attached to the following insn.  */
3171           emit_move_insn (accum, tem);
3172           val_so_far <<= log;
3173           break;
3174
3175         case alg_add_t_m2:
3176           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3177           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3178                                  add_target ? add_target : accum_target);
3179           val_so_far += HOST_WIDE_INT_1U << log;
3180           break;
3181
3182         case alg_sub_t_m2:
3183           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3184           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3185                                  add_target ? add_target : accum_target);
3186           val_so_far -= HOST_WIDE_INT_1U << log;
3187           break;
3188
3189         case alg_add_t2_m:
3190           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3191                                 log, shift_subtarget, 0);
3192           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3193                                  add_target ? add_target : accum_target);
3194           val_so_far = (val_so_far << log) + 1;
3195           break;
3196
3197         case alg_sub_t2_m:
3198           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3199                                 log, shift_subtarget, 0);
3200           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3201                                  add_target ? add_target : accum_target);
3202           val_so_far = (val_so_far << log) - 1;
3203           break;
3204
3205         case alg_add_factor:
3206           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3207           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3208                                  add_target ? add_target : accum_target);
3209           val_so_far += val_so_far << log;
3210           break;
3211
3212         case alg_sub_factor:
3213           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3214           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3215                                  (add_target
3216                                   ? add_target : (optimize ? 0 : tem)));
3217           val_so_far = (val_so_far << log) - val_so_far;
3218           break;
3219
3220         default:
3221           gcc_unreachable ();
3222         }
3223
3224       if (SCALAR_INT_MODE_P (mode))
3225         {
3226           /* Write a REG_EQUAL note on the last insn so that we can cse
3227              multiplication sequences.  Note that if ACCUM is a SUBREG,
3228              we've set the inner register and must properly indicate that.  */
3229           tem = op0, nmode = mode;
3230           accum_inner = accum;
3231           if (GET_CODE (accum) == SUBREG)
3232             {
3233               accum_inner = SUBREG_REG (accum);
3234               nmode = GET_MODE (accum_inner);
3235               tem = gen_lowpart (nmode, op0);
3236             }
3237
3238           insn = get_last_insn ();
3239           set_dst_reg_note (insn, REG_EQUAL,
3240                             gen_rtx_MULT (nmode, tem,
3241                                           gen_int_mode (val_so_far, nmode)),
3242                             accum_inner);
3243         }
3244     }
3245
3246   if (variant == negate_variant)
3247     {
3248       val_so_far = -val_so_far;
3249       accum = expand_unop (mode, neg_optab, accum, target, 0);
3250     }
3251   else if (variant == add_variant)
3252     {
3253       val_so_far = val_so_far + 1;
3254       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3255     }
3256
3257   /* Compare only the bits of val and val_so_far that are significant
3258      in the result mode, to avoid sign-/zero-extension confusion.  */
3259   nmode = GET_MODE_INNER (mode);
3260   val &= GET_MODE_MASK (nmode);
3261   val_so_far &= GET_MODE_MASK (nmode);
3262   gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3263
3264   return accum;
3265 }
3266
3267 /* Perform a multiplication and return an rtx for the result.
3268    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3269    TARGET is a suggestion for where to store the result (an rtx).
3270
3271    We check specially for a constant integer as OP1.
3272    If you want this check for OP0 as well, then before calling
3273    you should swap the two operands if OP0 would be constant.  */
3274
3275 rtx
3276 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3277              int unsignedp)
3278 {
3279   enum mult_variant variant;
3280   struct algorithm algorithm;
3281   rtx scalar_op1;
3282   int max_cost;
3283   bool speed = optimize_insn_for_speed_p ();
3284   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3285
3286   if (CONSTANT_P (op0))
3287     std::swap (op0, op1);
3288
3289   /* For vectors, there are several simplifications that can be made if
3290      all elements of the vector constant are identical.  */
3291   scalar_op1 = unwrap_const_vec_duplicate (op1);
3292
3293   if (INTEGRAL_MODE_P (mode))
3294     {
3295       rtx fake_reg;
3296       HOST_WIDE_INT coeff;
3297       bool is_neg;
3298       int mode_bitsize;
3299
3300       if (op1 == CONST0_RTX (mode))
3301         return op1;
3302       if (op1 == CONST1_RTX (mode))
3303         return op0;
3304       if (op1 == CONSTM1_RTX (mode))
3305         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3306                             op0, target, 0);
3307
3308       if (do_trapv)
3309         goto skip_synth;
3310
3311       /* If mode is integer vector mode, check if the backend supports
3312          vector lshift (by scalar or vector) at all.  If not, we can't use
3313          synthetized multiply.  */
3314       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3315           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3316           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3317         goto skip_synth;
3318
3319       /* These are the operations that are potentially turned into
3320          a sequence of shifts and additions.  */
3321       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3322
3323       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3324          less than or equal in size to `unsigned int' this doesn't matter.
3325          If the mode is larger than `unsigned int', then synth_mult works
3326          only if the constant value exactly fits in an `unsigned int' without
3327          any truncation.  This means that multiplying by negative values does
3328          not work; results are off by 2^32 on a 32 bit machine.  */
3329       if (CONST_INT_P (scalar_op1))
3330         {
3331           coeff = INTVAL (scalar_op1);
3332           is_neg = coeff < 0;
3333         }
3334 #if TARGET_SUPPORTS_WIDE_INT
3335       else if (CONST_WIDE_INT_P (scalar_op1))
3336 #else
3337       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3338 #endif
3339         {
3340           int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3341           /* Perfect power of 2 (other than 1, which is handled above).  */
3342           if (shift > 0)
3343             return expand_shift (LSHIFT_EXPR, mode, op0,
3344                                  shift, target, unsignedp);
3345           else
3346             goto skip_synth;
3347         }
3348       else
3349         goto skip_synth;
3350
3351       /* We used to test optimize here, on the grounds that it's better to
3352          produce a smaller program when -O is not used.  But this causes
3353          such a terrible slowdown sometimes that it seems better to always
3354          use synth_mult.  */
3355
3356       /* Special case powers of two.  */
3357       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3358           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3359         return expand_shift (LSHIFT_EXPR, mode, op0,
3360                              floor_log2 (coeff), target, unsignedp);
3361
3362       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3363
3364       /* Attempt to handle multiplication of DImode values by negative
3365          coefficients, by performing the multiplication by a positive
3366          multiplier and then inverting the result.  */
3367       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3368         {
3369           /* Its safe to use -coeff even for INT_MIN, as the
3370              result is interpreted as an unsigned coefficient.
3371              Exclude cost of op0 from max_cost to match the cost
3372              calculation of the synth_mult.  */
3373           coeff = -(unsigned HOST_WIDE_INT) coeff;
3374           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3375                                     mode, speed)
3376                       - neg_cost (speed, mode));
3377           if (max_cost <= 0)
3378             goto skip_synth;
3379
3380           /* Special case powers of two.  */
3381           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3382             {
3383               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3384                                        floor_log2 (coeff), target, unsignedp);
3385               return expand_unop (mode, neg_optab, temp, target, 0);
3386             }
3387
3388           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3389                                    max_cost))
3390             {
3391               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3392                                             &algorithm, variant);
3393               return expand_unop (mode, neg_optab, temp, target, 0);
3394             }
3395           goto skip_synth;
3396         }
3397
3398       /* Exclude cost of op0 from max_cost to match the cost
3399          calculation of the synth_mult.  */
3400       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3401       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3402         return expand_mult_const (mode, op0, coeff, target,
3403                                   &algorithm, variant);
3404     }
3405  skip_synth:
3406
3407   /* Expand x*2.0 as x+x.  */
3408   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3409       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3410     {
3411       op0 = force_reg (GET_MODE (op0), op0);
3412       return expand_binop (mode, add_optab, op0, op0,
3413                            target, unsignedp, OPTAB_LIB_WIDEN);
3414     }
3415
3416   /* This used to use umul_optab if unsigned, but for non-widening multiply
3417      there is no difference between signed and unsigned.  */
3418   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3419                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3420   gcc_assert (op0);
3421   return op0;
3422 }
3423
3424 /* Return a cost estimate for multiplying a register by the given
3425    COEFFicient in the given MODE and SPEED.  */
3426
3427 int
3428 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3429 {
3430   int max_cost;
3431   struct algorithm algorithm;
3432   enum mult_variant variant;
3433
3434   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3435   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3436                            mode, speed);
3437   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3438     return algorithm.cost.cost;
3439   else
3440     return max_cost;
3441 }
3442
3443 /* Perform a widening multiplication and return an rtx for the result.
3444    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3445    TARGET is a suggestion for where to store the result (an rtx).
3446    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3447    or smul_widen_optab.
3448
3449    We check specially for a constant integer as OP1, comparing the
3450    cost of a widening multiply against the cost of a sequence of shifts
3451    and adds.  */
3452
3453 rtx
3454 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3455                       int unsignedp, optab this_optab)
3456 {
3457   bool speed = optimize_insn_for_speed_p ();
3458   rtx cop1;
3459
3460   if (CONST_INT_P (op1)
3461       && GET_MODE (op0) != VOIDmode
3462       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3463                                 this_optab == umul_widen_optab))
3464       && CONST_INT_P (cop1)
3465       && (INTVAL (cop1) >= 0
3466           || HWI_COMPUTABLE_MODE_P (mode)))
3467     {
3468       HOST_WIDE_INT coeff = INTVAL (cop1);
3469       int max_cost;
3470       enum mult_variant variant;
3471       struct algorithm algorithm;
3472
3473       if (coeff == 0)
3474         return CONST0_RTX (mode);
3475
3476       /* Special case powers of two.  */
3477       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3478         {
3479           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3480           return expand_shift (LSHIFT_EXPR, mode, op0,
3481                                floor_log2 (coeff), target, unsignedp);
3482         }
3483
3484       /* Exclude cost of op0 from max_cost to match the cost
3485          calculation of the synth_mult.  */
3486       max_cost = mul_widen_cost (speed, mode);
3487       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3488                                max_cost))
3489         {
3490           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3491           return expand_mult_const (mode, op0, coeff, target,
3492                                     &algorithm, variant);
3493         }
3494     }
3495   return expand_binop (mode, this_optab, op0, op1, target,
3496                        unsignedp, OPTAB_LIB_WIDEN);
3497 }
3498 \f
3499 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3500    replace division by D, and put the least significant N bits of the result
3501    in *MULTIPLIER_PTR and return the most significant bit.
3502
3503    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3504    needed precision is in PRECISION (should be <= N).
3505
3506    PRECISION should be as small as possible so this function can choose
3507    multiplier more freely.
3508
3509    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3510    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3511
3512    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3513    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3514
3515 unsigned HOST_WIDE_INT
3516 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3517                    unsigned HOST_WIDE_INT *multiplier_ptr,
3518                    int *post_shift_ptr, int *lgup_ptr)
3519 {
3520   int lgup, post_shift;
3521   int pow, pow2;
3522
3523   /* lgup = ceil(log2(divisor)); */
3524   lgup = ceil_log2 (d);
3525
3526   gcc_assert (lgup <= n);
3527
3528   pow = n + lgup;
3529   pow2 = n + lgup - precision;
3530
3531   /* mlow = 2^(N + lgup)/d */
3532   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3533   wide_int mlow = wi::udiv_trunc (val, d);
3534
3535   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3536   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3537   wide_int mhigh = wi::udiv_trunc (val, d);
3538
3539   /* If precision == N, then mlow, mhigh exceed 2^N
3540      (but they do not exceed 2^(N+1)).  */
3541
3542   /* Reduce to lowest terms.  */
3543   for (post_shift = lgup; post_shift > 0; post_shift--)
3544     {
3545       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3546                                                        HOST_BITS_PER_WIDE_INT);
3547       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3548                                                        HOST_BITS_PER_WIDE_INT);
3549       if (ml_lo >= mh_lo)
3550         break;
3551
3552       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3553       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3554     }
3555
3556   *post_shift_ptr = post_shift;
3557   *lgup_ptr = lgup;
3558   if (n < HOST_BITS_PER_WIDE_INT)
3559     {
3560       unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3561       *multiplier_ptr = mhigh.to_uhwi () & mask;
3562       return mhigh.to_uhwi () >= mask;
3563     }
3564   else
3565     {
3566       *multiplier_ptr = mhigh.to_uhwi ();
3567       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3568     }
3569 }
3570
3571 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3572    congruent to 1 (mod 2**N).  */
3573
3574 static unsigned HOST_WIDE_INT
3575 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3576 {
3577   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3578
3579   /* The algorithm notes that the choice y = x satisfies
3580      x*y == 1 mod 2^3, since x is assumed odd.
3581      Each iteration doubles the number of bits of significance in y.  */
3582
3583   unsigned HOST_WIDE_INT mask;
3584   unsigned HOST_WIDE_INT y = x;
3585   int nbit = 3;
3586
3587   mask = (n == HOST_BITS_PER_WIDE_INT
3588           ? HOST_WIDE_INT_M1U
3589           : (HOST_WIDE_INT_1U << n) - 1);
3590
3591   while (nbit < n)
3592     {
3593       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3594       nbit *= 2;
3595     }
3596   return y;
3597 }
3598
3599 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3600    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3601    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3602    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3603    become signed.
3604
3605    The result is put in TARGET if that is convenient.
3606
3607    MODE is the mode of operation.  */
3608
3609 rtx
3610 expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
3611                              rtx op1, rtx target, int unsignedp)
3612 {
3613   rtx tem;
3614   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3615
3616   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3617                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3618   tem = expand_and (mode, tem, op1, NULL_RTX);
3619   adj_operand
3620     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3621                      adj_operand);
3622
3623   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3624                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3625   tem = expand_and (mode, tem, op0, NULL_RTX);
3626   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3627                           target);
3628
3629   return target;
3630 }
3631
3632 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3633
3634 static rtx
3635 extract_high_half (machine_mode mode, rtx op)
3636 {
3637   machine_mode wider_mode;
3638
3639   if (mode == word_mode)
3640     return gen_highpart (mode, op);
3641
3642   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3643
3644   wider_mode = GET_MODE_WIDER_MODE (mode);
3645   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3646                      GET_MODE_BITSIZE (mode), 0, 1);
3647   return convert_modes (mode, wider_mode, op, 0);
3648 }
3649
3650 /* Like expmed_mult_highpart, but only consider using a multiplication
3651    optab.  OP1 is an rtx for the constant operand.  */
3652
3653 static rtx
3654 expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
3655                             rtx target, int unsignedp, int max_cost)
3656 {
3657   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3658   machine_mode wider_mode;
3659   optab moptab;
3660   rtx tem;
3661   int size;
3662   bool speed = optimize_insn_for_speed_p ();
3663
3664   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3665
3666   wider_mode = GET_MODE_WIDER_MODE (mode);
3667   size = GET_MODE_BITSIZE (mode);
3668
3669   /* Firstly, try using a multiplication insn that only generates the needed
3670      high part of the product, and in the sign flavor of unsignedp.  */
3671   if (mul_highpart_cost (speed, mode) < max_cost)
3672     {
3673       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3674       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3675                           unsignedp, OPTAB_DIRECT);
3676       if (tem)
3677         return tem;
3678     }
3679
3680   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3681      Need to adjust the result after the multiplication.  */
3682   if (size - 1 < BITS_PER_WORD
3683       && (mul_highpart_cost (speed, mode)
3684           + 2 * shift_cost (speed, mode, size-1)
3685           + 4 * add_cost (speed, mode) < max_cost))
3686     {
3687       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3688       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3689                           unsignedp, OPTAB_DIRECT);
3690       if (tem)
3691         /* We used the wrong signedness.  Adjust the result.  */
3692         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3693                                             tem, unsignedp);
3694     }
3695
3696   /* Try widening multiplication.  */
3697   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3698   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3699       && mul_widen_cost (speed, wider_mode) < max_cost)
3700     {
3701       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3702                           unsignedp, OPTAB_WIDEN);
3703       if (tem)
3704         return extract_high_half (mode, tem);
3705     }
3706
3707   /* Try widening the mode and perform a non-widening multiplication.  */
3708   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3709       && size - 1 < BITS_PER_WORD
3710       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3711           < max_cost))
3712     {
3713       rtx_insn *insns;
3714       rtx wop0, wop1;
3715
3716       /* We need to widen the operands, for example to ensure the
3717          constant multiplier is correctly sign or zero extended.
3718          Use a sequence to clean-up any instructions emitted by
3719          the conversions if things don't work out.  */
3720       start_sequence ();
3721       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3722       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3723       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3724                           unsignedp, OPTAB_WIDEN);
3725       insns = get_insns ();
3726       end_sequence ();
3727
3728       if (tem)
3729         {
3730           emit_insn (insns);
3731           return extract_high_half (mode, tem);
3732         }
3733     }
3734
3735   /* Try widening multiplication of opposite signedness, and adjust.  */
3736   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3737   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3738       && size - 1 < BITS_PER_WORD
3739       && (mul_widen_cost (speed, wider_mode)
3740           + 2 * shift_cost (speed, mode, size-1)
3741           + 4 * add_cost (speed, mode) < max_cost))
3742     {
3743       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3744                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3745       if (tem != 0)
3746         {
3747           tem = extract_high_half (mode, tem);
3748           /* We used the wrong signedness.  Adjust the result.  */
3749           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3750                                               target, unsignedp);
3751         }
3752     }
3753
3754   return 0;
3755 }
3756
3757 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3758    putting the high half of the result in TARGET if that is convenient,
3759    and return where the result is.  If the operation can not be performed,
3760    0 is returned.
3761
3762    MODE is the mode of operation and result.
3763
3764    UNSIGNEDP nonzero means unsigned multiply.
3765
3766    MAX_COST is the total allowed cost for the expanded RTL.  */
3767
3768 static rtx
3769 expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
3770                       rtx target, int unsignedp, int max_cost)
3771 {
3772   machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3773   unsigned HOST_WIDE_INT cnst1;
3774   int extra_cost;
3775   bool sign_adjust = false;
3776   enum mult_variant variant;
3777   struct algorithm alg;
3778   rtx tem;
3779   bool speed = optimize_insn_for_speed_p ();
3780
3781   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3782   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3783   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3784
3785   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3786
3787   /* We can't optimize modes wider than BITS_PER_WORD.
3788      ??? We might be able to perform double-word arithmetic if
3789      mode == word_mode, however all the cost calculations in
3790      synth_mult etc. assume single-word operations.  */
3791   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3792     return expmed_mult_highpart_optab (mode, op0, op1, target,
3793                                        unsignedp, max_cost);
3794
3795   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3796
3797   /* Check whether we try to multiply by a negative constant.  */
3798   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3799     {
3800       sign_adjust = true;
3801       extra_cost += add_cost (speed, mode);
3802     }
3803
3804   /* See whether shift/add multiplication is cheap enough.  */
3805   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3806                            max_cost - extra_cost))
3807     {
3808       /* See whether the specialized multiplication optabs are
3809          cheaper than the shift/add version.  */
3810       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3811                                         alg.cost.cost + extra_cost);
3812       if (tem)
3813         return tem;
3814
3815       tem = convert_to_mode (wider_mode, op0, unsignedp);
3816       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3817       tem = extract_high_half (mode, tem);
3818
3819       /* Adjust result for signedness.  */
3820       if (sign_adjust)
3821         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3822
3823       return tem;
3824     }
3825   return expmed_mult_highpart_optab (mode, op0, op1, target,
3826                                      unsignedp, max_cost);
3827 }
3828
3829
3830 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3831
3832 static rtx
3833 expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3834 {
3835   rtx result, temp, shift;
3836   rtx_code_label *label;
3837   int logd;
3838   int prec = GET_MODE_PRECISION (mode);
3839
3840   logd = floor_log2 (d);
3841   result = gen_reg_rtx (mode);
3842
3843   /* Avoid conditional branches when they're expensive.  */
3844   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3845       && optimize_insn_for_speed_p ())
3846     {
3847       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3848                                       mode, 0, -1);
3849       if (signmask)
3850         {
3851           HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
3852           signmask = force_reg (mode, signmask);
3853           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3854
3855           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3856              which instruction sequence to use.  If logical right shifts
3857              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3858              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3859
3860           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3861           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3862               || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
3863                   > COSTS_N_INSNS (2)))
3864             {
3865               temp = expand_binop (mode, xor_optab, op0, signmask,
3866                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3867               temp = expand_binop (mode, sub_optab, temp, signmask,
3868                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3869               temp = expand_binop (mode, and_optab, temp,
3870                                    gen_int_mode (masklow, mode),
3871                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3872               temp = expand_binop (mode, xor_optab, temp, signmask,
3873                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3874               temp = expand_binop (mode, sub_optab, temp, signmask,
3875                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3876             }
3877           else
3878             {
3879               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3880                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3881               signmask = force_reg (mode, signmask);
3882
3883               temp = expand_binop (mode, add_optab, op0, signmask,
3884                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3885               temp = expand_binop (mode, and_optab, temp,
3886                                    gen_int_mode (masklow, mode),
3887                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3888               temp = expand_binop (mode, sub_optab, temp, signmask,
3889                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3890             }
3891           return temp;
3892         }
3893     }
3894
3895   /* Mask contains the mode's signbit and the significant bits of the
3896      modulus.  By including the signbit in the operation, many targets
3897      can avoid an explicit compare operation in the following comparison
3898      against zero.  */
3899   wide_int mask = wi::mask (logd, false, prec);
3900   mask = wi::set_bit (mask, prec - 1);
3901
3902   temp = expand_binop (mode, and_optab, op0,
3903                        immed_wide_int_const (mask, mode),
3904                        result, 1, OPTAB_LIB_WIDEN);
3905   if (temp != result)
3906     emit_move_insn (result, temp);
3907
3908   label = gen_label_rtx ();
3909   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3910
3911   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3912                        0, OPTAB_LIB_WIDEN);
3913
3914   mask = wi::mask (logd, true, prec);
3915   temp = expand_binop (mode, ior_optab, temp,
3916                        immed_wide_int_const (mask, mode),
3917                        result, 1, OPTAB_LIB_WIDEN);
3918   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3919                        0, OPTAB_LIB_WIDEN);
3920   if (temp != result)
3921     emit_move_insn (result, temp);
3922   emit_label (label);
3923   return result;
3924 }
3925
3926 /* Expand signed division of OP0 by a power of two D in mode MODE.
3927    This routine is only called for positive values of D.  */
3928
3929 static rtx
3930 expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3931 {
3932   rtx temp;
3933   rtx_code_label *label;
3934   int logd;
3935
3936   logd = floor_log2 (d);
3937
3938   if (d == 2
3939       && BRANCH_COST (optimize_insn_for_speed_p (),
3940                       false) >= 1)
3941     {
3942       temp = gen_reg_rtx (mode);
3943       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3944       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3945                            0, OPTAB_LIB_WIDEN);
3946       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3947     }
3948
3949   if (HAVE_conditional_move
3950       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
3951     {
3952       rtx temp2;
3953
3954       start_sequence ();
3955       temp2 = copy_to_mode_reg (mode, op0);
3956       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3957                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3958       temp = force_reg (mode, temp);
3959
3960       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3961       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3962                                      mode, temp, temp2, mode, 0);
3963       if (temp2)
3964         {
3965           rtx_insn *seq = get_insns ();
3966           end_sequence ();
3967           emit_insn (seq);
3968           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3969         }
3970       end_sequence ();
3971     }
3972
3973   if (BRANCH_COST (optimize_insn_for_speed_p (),
3974                    false) >= 2)
3975     {
3976       int ushift = GET_MODE_BITSIZE (mode) - logd;
3977
3978       temp = gen_reg_rtx (mode);
3979       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3980       if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3981           || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3982              > COSTS_N_INSNS (1))
3983         temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3984                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3985       else
3986         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3987                              ushift, NULL_RTX, 1);
3988       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3989                            0, OPTAB_LIB_WIDEN);
3990       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3991     }
3992
3993   label = gen_label_rtx ();
3994   temp = copy_to_mode_reg (mode, op0);
3995   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3996   expand_inc (temp, gen_int_mode (d - 1, mode));
3997   emit_label (label);
3998   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3999 }
4000 \f
4001 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
4002    if that is convenient, and returning where the result is.
4003    You may request either the quotient or the remainder as the result;
4004    specify REM_FLAG nonzero to get the remainder.
4005
4006    CODE is the expression code for which kind of division this is;
4007    it controls how rounding is done.  MODE is the machine mode to use.
4008    UNSIGNEDP nonzero means do unsigned division.  */
4009
4010 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4011    and then correct it by or'ing in missing high bits
4012    if result of ANDI is nonzero.
4013    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4014    This could optimize to a bfexts instruction.
4015    But C doesn't use these operations, so their optimizations are
4016    left for later.  */
4017 /* ??? For modulo, we don't actually need the highpart of the first product,
4018    the low part will do nicely.  And for small divisors, the second multiply
4019    can also be a low-part only multiply or even be completely left out.
4020    E.g. to calculate the remainder of a division by 3 with a 32 bit
4021    multiply, multiply with 0x55555556 and extract the upper two bits;
4022    the result is exact for inputs up to 0x1fffffff.
4023    The input range can be reduced by using cross-sum rules.
4024    For odd divisors >= 3, the following table gives right shift counts
4025    so that if a number is shifted by an integer multiple of the given
4026    amount, the remainder stays the same:
4027    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4028    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4029    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4030    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4031    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4032
4033    Cross-sum rules for even numbers can be derived by leaving as many bits
4034    to the right alone as the divisor has zeros to the right.
4035    E.g. if x is an unsigned 32 bit number:
4036    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4037    */
4038
4039 rtx
4040 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4041                rtx op0, rtx op1, rtx target, int unsignedp)
4042 {
4043   machine_mode compute_mode;
4044   rtx tquotient;
4045   rtx quotient = 0, remainder = 0;
4046   rtx_insn *last;
4047   int size;
4048   rtx_insn *insn;
4049   optab optab1, optab2;
4050   int op1_is_constant, op1_is_pow2 = 0;
4051   int max_cost, extra_cost;
4052   static HOST_WIDE_INT last_div_const = 0;
4053   bool speed = optimize_insn_for_speed_p ();
4054
4055   op1_is_constant = CONST_INT_P (op1);
4056   if (op1_is_constant)
4057     {
4058       wide_int ext_op1 = rtx_mode_t (op1, mode);
4059       op1_is_pow2 = (wi::popcount (ext_op1) == 1
4060                      || (! unsignedp
4061                          && wi::popcount (wi::neg (ext_op1)) == 1));
4062     }
4063
4064   /*
4065      This is the structure of expand_divmod:
4066
4067      First comes code to fix up the operands so we can perform the operations
4068      correctly and efficiently.
4069
4070      Second comes a switch statement with code specific for each rounding mode.
4071      For some special operands this code emits all RTL for the desired
4072      operation, for other cases, it generates only a quotient and stores it in
4073      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4074      to indicate that it has not done anything.
4075
4076      Last comes code that finishes the operation.  If QUOTIENT is set and
4077      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4078      QUOTIENT is not set, it is computed using trunc rounding.
4079
4080      We try to generate special code for division and remainder when OP1 is a
4081      constant.  If |OP1| = 2**n we can use shifts and some other fast
4082      operations.  For other values of OP1, we compute a carefully selected
4083      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4084      by m.
4085
4086      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4087      half of the product.  Different strategies for generating the product are
4088      implemented in expmed_mult_highpart.
4089
4090      If what we actually want is the remainder, we generate that by another
4091      by-constant multiplication and a subtraction.  */
4092
4093   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4094      code below will malfunction if we are, so check here and handle
4095      the special case if so.  */
4096   if (op1 == const1_rtx)
4097     return rem_flag ? const0_rtx : op0;
4098
4099     /* When dividing by -1, we could get an overflow.
4100      negv_optab can handle overflows.  */
4101   if (! unsignedp && op1 == constm1_rtx)
4102     {
4103       if (rem_flag)
4104         return const0_rtx;
4105       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4106                           ? negv_optab : neg_optab, op0, target, 0);
4107     }
4108
4109   if (target
4110       /* Don't use the function value register as a target
4111          since we have to read it as well as write it,
4112          and function-inlining gets confused by this.  */
4113       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4114           /* Don't clobber an operand while doing a multi-step calculation.  */
4115           || ((rem_flag || op1_is_constant)
4116               && (reg_mentioned_p (target, op0)
4117                   || (MEM_P (op0) && MEM_P (target))))
4118           || reg_mentioned_p (target, op1)
4119           || (MEM_P (op1) && MEM_P (target))))
4120     target = 0;
4121
4122   /* Get the mode in which to perform this computation.  Normally it will
4123      be MODE, but sometimes we can't do the desired operation in MODE.
4124      If so, pick a wider mode in which we can do the operation.  Convert
4125      to that mode at the start to avoid repeated conversions.
4126
4127      First see what operations we need.  These depend on the expression
4128      we are evaluating.  (We assume that divxx3 insns exist under the
4129      same conditions that modxx3 insns and that these insns don't normally
4130      fail.  If these assumptions are not correct, we may generate less
4131      efficient code in some cases.)
4132
4133      Then see if we find a mode in which we can open-code that operation
4134      (either a division, modulus, or shift).  Finally, check for the smallest
4135      mode for which we can do the operation with a library call.  */
4136
4137   /* We might want to refine this now that we have division-by-constant
4138      optimization.  Since expmed_mult_highpart tries so many variants, it is
4139      not straightforward to generalize this.  Maybe we should make an array
4140      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4141
4142   optab1 = (op1_is_pow2
4143             ? (unsignedp ? lshr_optab : ashr_optab)
4144             : (unsignedp ? udiv_optab : sdiv_optab));
4145   optab2 = (op1_is_pow2 ? optab1
4146             : (unsignedp ? udivmod_optab : sdivmod_optab));
4147
4148   FOR_EACH_MODE_FROM (compute_mode, mode)
4149     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4150         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4151       break;
4152
4153   if (compute_mode == VOIDmode)
4154     FOR_EACH_MODE_FROM (compute_mode, mode)
4155       if (optab_libfunc (optab1, compute_mode)
4156           || optab_libfunc (optab2, compute_mode))
4157         break;
4158
4159   /* If we still couldn't find a mode, use MODE, but expand_binop will
4160      probably die.  */
4161   if (compute_mode == VOIDmode)
4162     compute_mode = mode;
4163
4164   if (target && GET_MODE (target) == compute_mode)
4165     tquotient = target;
4166   else
4167     tquotient = gen_reg_rtx (compute_mode);
4168
4169   size = GET_MODE_BITSIZE (compute_mode);
4170 #if 0
4171   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4172      (mode), and thereby get better code when OP1 is a constant.  Do that
4173      later.  It will require going over all usages of SIZE below.  */
4174   size = GET_MODE_BITSIZE (mode);
4175 #endif
4176
4177   /* Only deduct something for a REM if the last divide done was
4178      for a different constant.   Then set the constant of the last
4179      divide.  */
4180   max_cost = (unsignedp
4181               ? udiv_cost (speed, compute_mode)
4182               : sdiv_cost (speed, compute_mode));
4183   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4184                      && INTVAL (op1) == last_div_const))
4185     max_cost -= (mul_cost (speed, compute_mode)
4186                  + add_cost (speed, compute_mode));
4187
4188   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4189
4190   /* Now convert to the best mode to use.  */
4191   if (compute_mode != mode)
4192     {
4193       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4194       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4195
4196       /* convert_modes may have placed op1 into a register, so we
4197          must recompute the following.  */
4198       op1_is_constant = CONST_INT_P (op1);
4199       if (op1_is_constant)
4200         {
4201           wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4202           op1_is_pow2 = (wi::popcount (ext_op1) == 1
4203                          || (! unsignedp
4204                              && wi::popcount (wi::neg (ext_op1)) == 1));
4205         }
4206       else
4207         op1_is_pow2 = 0;
4208     }
4209
4210   /* If one of the operands is a volatile MEM, copy it into a register.  */
4211
4212   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4213     op0 = force_reg (compute_mode, op0);
4214   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4215     op1 = force_reg (compute_mode, op1);
4216
4217   /* If we need the remainder or if OP1 is constant, we need to
4218      put OP0 in a register in case it has any queued subexpressions.  */
4219   if (rem_flag || op1_is_constant)
4220     op0 = force_reg (compute_mode, op0);
4221
4222   last = get_last_insn ();
4223
4224   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4225   if (unsignedp)
4226     {
4227       if (code == FLOOR_DIV_EXPR)
4228         code = TRUNC_DIV_EXPR;
4229       if (code == FLOOR_MOD_EXPR)
4230         code = TRUNC_MOD_EXPR;
4231       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4232         code = TRUNC_DIV_EXPR;
4233     }
4234
4235   if (op1 != const0_rtx)
4236     switch (code)
4237       {
4238       case TRUNC_MOD_EXPR:
4239       case TRUNC_DIV_EXPR:
4240         if (op1_is_constant)
4241           {
4242             if (unsignedp)
4243               {
4244                 unsigned HOST_WIDE_INT mh, ml;
4245                 int pre_shift, post_shift;
4246                 int dummy;
4247                 wide_int wd = rtx_mode_t (op1, compute_mode);
4248                 unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4249
4250                 if (wi::popcount (wd) == 1)
4251                   {
4252                     pre_shift = floor_log2 (d);
4253                     if (rem_flag)
4254                       {
4255                         unsigned HOST_WIDE_INT mask
4256                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4257                         remainder
4258                           = expand_binop (compute_mode, and_optab, op0,
4259                                           gen_int_mode (mask, compute_mode),
4260                                           remainder, 1,
4261                                           OPTAB_LIB_WIDEN);
4262                         if (remainder)
4263                           return gen_lowpart (mode, remainder);
4264                       }
4265                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4266                                              pre_shift, tquotient, 1);
4267                   }
4268                 else if (size <= HOST_BITS_PER_WIDE_INT)
4269                   {
4270                     if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4271                       {
4272                         /* Most significant bit of divisor is set; emit an scc
4273                            insn.  */
4274                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4275                                                           compute_mode, 1, 1);
4276                       }
4277                     else
4278                       {
4279                         /* Find a suitable multiplier and right shift count
4280                            instead of multiplying with D.  */
4281
4282                         mh = choose_multiplier (d, size, size,
4283                                                 &ml, &post_shift, &dummy);
4284
4285                         /* If the suggested multiplier is more than SIZE bits,
4286                            we can do better for even divisors, using an
4287                            initial right shift.  */
4288                         if (mh != 0 && (d & 1) == 0)
4289                           {
4290                             pre_shift = ctz_or_zero (d);
4291                             mh = choose_multiplier (d >> pre_shift, size,
4292                                                     size - pre_shift,
4293                                                     &ml, &post_shift, &dummy);
4294                             gcc_assert (!mh);
4295                           }
4296                         else
4297                           pre_shift = 0;
4298
4299                         if (mh != 0)
4300                           {
4301                             rtx t1, t2, t3, t4;
4302
4303                             if (post_shift - 1 >= BITS_PER_WORD)
4304                               goto fail1;
4305
4306                             extra_cost
4307                               = (shift_cost (speed, compute_mode, post_shift - 1)
4308                                  + shift_cost (speed, compute_mode, 1)
4309                                  + 2 * add_cost (speed, compute_mode));
4310                             t1 = expmed_mult_highpart
4311                               (compute_mode, op0,
4312                                gen_int_mode (ml, compute_mode),
4313                                NULL_RTX, 1, max_cost - extra_cost);
4314                             if (t1 == 0)
4315                               goto fail1;
4316                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4317                                                                op0, t1),
4318                                                 NULL_RTX);
4319                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4320                                                t2, 1, NULL_RTX, 1);
4321                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4322                                                               t1, t3),
4323                                                 NULL_RTX);
4324                             quotient = expand_shift
4325                               (RSHIFT_EXPR, compute_mode, t4,
4326                                post_shift - 1, tquotient, 1);
4327                           }
4328                         else
4329                           {
4330                             rtx t1, t2;
4331
4332                             if (pre_shift >= BITS_PER_WORD
4333                                 || post_shift >= BITS_PER_WORD)
4334                               goto fail1;
4335
4336                             t1 = expand_shift
4337                               (RSHIFT_EXPR, compute_mode, op0,
4338                                pre_shift, NULL_RTX, 1);
4339                             extra_cost
4340                               = (shift_cost (speed, compute_mode, pre_shift)
4341                                  + shift_cost (speed, compute_mode, post_shift));
4342                             t2 = expmed_mult_highpart
4343                               (compute_mode, t1,
4344                                gen_int_mode (ml, compute_mode),
4345                                NULL_RTX, 1, max_cost - extra_cost);
4346                             if (t2 == 0)
4347                               goto fail1;
4348                             quotient = expand_shift
4349                               (RSHIFT_EXPR, compute_mode, t2,
4350                                post_shift, tquotient, 1);
4351                           }
4352                       }
4353                   }
4354                 else            /* Too wide mode to use tricky code */
4355                   break;
4356
4357                 insn = get_last_insn ();
4358                 if (insn != last)
4359                   set_dst_reg_note (insn, REG_EQUAL,
4360                                     gen_rtx_UDIV (compute_mode, op0, op1),
4361                                     quotient);
4362               }
4363             else                /* TRUNC_DIV, signed */
4364               {
4365                 unsigned HOST_WIDE_INT ml;
4366                 int lgup, post_shift;
4367                 rtx mlr;
4368                 HOST_WIDE_INT d = INTVAL (op1);
4369                 unsigned HOST_WIDE_INT abs_d;
4370
4371                 /* Since d might be INT_MIN, we have to cast to
4372                    unsigned HOST_WIDE_INT before negating to avoid
4373                    undefined signed overflow.  */
4374                 abs_d = (d >= 0
4375                          ? (unsigned HOST_WIDE_INT) d
4376                          : - (unsigned HOST_WIDE_INT) d);
4377
4378                 /* n rem d = n rem -d */
4379                 if (rem_flag && d < 0)
4380                   {
4381                     d = abs_d;
4382                     op1 = gen_int_mode (abs_d, compute_mode);
4383                   }
4384
4385                 if (d == 1)
4386                   quotient = op0;
4387                 else if (d == -1)
4388                   quotient = expand_unop (compute_mode, neg_optab, op0,
4389                                           tquotient, 0);
4390                 else if (size <= HOST_BITS_PER_WIDE_INT
4391                          && abs_d == HOST_WIDE_INT_1U << (size - 1))
4392                   {
4393                     /* This case is not handled correctly below.  */
4394                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4395                                                 compute_mode, 1, 1);
4396                     if (quotient == 0)
4397                       goto fail1;
4398                   }
4399                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4400                          && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4401                          && (rem_flag
4402                              ? smod_pow2_cheap (speed, compute_mode)
4403                              : sdiv_pow2_cheap (speed, compute_mode))
4404                          /* We assume that cheap metric is true if the
4405                             optab has an expander for this mode.  */
4406                          && ((optab_handler ((rem_flag ? smod_optab
4407                                               : sdiv_optab),
4408                                              compute_mode)
4409                               != CODE_FOR_nothing)
4410                              || (optab_handler (sdivmod_optab,
4411                                                 compute_mode)
4412                                  != CODE_FOR_nothing)))
4413                   ;
4414                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d)
4415                          && (size <= HOST_BITS_PER_WIDE_INT
4416                              || abs_d != (unsigned HOST_WIDE_INT) d))
4417                   {
4418                     if (rem_flag)
4419                       {
4420                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4421                         if (remainder)
4422                           return gen_lowpart (mode, remainder);
4423                       }
4424
4425                     if (sdiv_pow2_cheap (speed, compute_mode)
4426                         && ((optab_handler (sdiv_optab, compute_mode)
4427                              != CODE_FOR_nothing)
4428                             || (optab_handler (sdivmod_optab, compute_mode)
4429                                 != CODE_FOR_nothing)))
4430                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4431                                                 compute_mode, op0,
4432                                                 gen_int_mode (abs_d,
4433                                                               compute_mode),
4434                                                 NULL_RTX, 0);
4435                     else
4436                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4437
4438                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4439                        negate the quotient.  */
4440                     if (d < 0)
4441                       {
4442                         insn = get_last_insn ();
4443                         if (insn != last
4444                             && abs_d < (HOST_WIDE_INT_1U
4445                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4446                           set_dst_reg_note (insn, REG_EQUAL,
4447                                             gen_rtx_DIV (compute_mode, op0,
4448                                                          gen_int_mode
4449                                                            (abs_d,
4450                                                             compute_mode)),
4451                                             quotient);
4452
4453                         quotient = expand_unop (compute_mode, neg_optab,
4454                                                 quotient, quotient, 0);
4455                       }
4456                   }
4457                 else if (size <= HOST_BITS_PER_WIDE_INT)
4458                   {
4459                     choose_multiplier (abs_d, size, size - 1,
4460                                        &ml, &post_shift, &lgup);
4461                     if (ml < HOST_WIDE_INT_1U << (size - 1))
4462                       {
4463                         rtx t1, t2, t3;
4464
4465                         if (post_shift >= BITS_PER_WORD
4466                             || size - 1 >= BITS_PER_WORD)
4467                           goto fail1;
4468
4469                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4470                                       + shift_cost (speed, compute_mode, size - 1)
4471                                       + add_cost (speed, compute_mode));
4472                         t1 = expmed_mult_highpart
4473                           (compute_mode, op0, gen_int_mode (ml, compute_mode),
4474                            NULL_RTX, 0, max_cost - extra_cost);
4475                         if (t1 == 0)
4476                           goto fail1;
4477                         t2 = expand_shift
4478                           (RSHIFT_EXPR, compute_mode, t1,
4479                            post_shift, NULL_RTX, 0);
4480                         t3 = expand_shift
4481                           (RSHIFT_EXPR, compute_mode, op0,
4482                            size - 1, NULL_RTX, 0);
4483                         if (d < 0)
4484                           quotient
4485                             = force_operand (gen_rtx_MINUS (compute_mode,
4486                                                             t3, t2),
4487                                              tquotient);
4488                         else
4489                           quotient
4490                             = force_operand (gen_rtx_MINUS (compute_mode,
4491                                                             t2, t3),
4492                                              tquotient);
4493                       }
4494                     else
4495                       {
4496                         rtx t1, t2, t3, t4;
4497
4498                         if (post_shift >= BITS_PER_WORD
4499                             || size - 1 >= BITS_PER_WORD)
4500                           goto fail1;
4501
4502                         ml |= HOST_WIDE_INT_M1U << (size - 1);
4503                         mlr = gen_int_mode (ml, compute_mode);
4504                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4505                                       + shift_cost (speed, compute_mode, size - 1)
4506                                       + 2 * add_cost (speed, compute_mode));
4507                         t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4508                                                    NULL_RTX, 0,
4509                                                    max_cost - extra_cost);
4510                         if (t1 == 0)
4511                           goto fail1;
4512                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4513                                                           t1, op0),
4514                                             NULL_RTX);
4515                         t3 = expand_shift
4516                           (RSHIFT_EXPR, compute_mode, t2,
4517                            post_shift, NULL_RTX, 0);
4518                         t4 = expand_shift
4519                           (RSHIFT_EXPR, compute_mode, op0,
4520                            size - 1, NULL_RTX, 0);
4521                         if (d < 0)
4522                           quotient
4523                             = force_operand (gen_rtx_MINUS (compute_mode,
4524                                                             t4, t3),
4525                                              tquotient);
4526                         else
4527                           quotient
4528                             = force_operand (gen_rtx_MINUS (compute_mode,
4529                                                             t3, t4),
4530                                              tquotient);
4531                       }
4532                   }
4533                 else            /* Too wide mode to use tricky code */
4534                   break;
4535
4536                 insn = get_last_insn ();
4537                 if (insn != last)
4538                   set_dst_reg_note (insn, REG_EQUAL,
4539                                     gen_rtx_DIV (compute_mode, op0, op1),
4540                                     quotient);
4541               }
4542             break;
4543           }
4544       fail1:
4545         delete_insns_since (last);
4546         break;
4547
4548       case FLOOR_DIV_EXPR:
4549       case FLOOR_MOD_EXPR:
4550       /* We will come here only for signed operations.  */
4551         if (op1_is_constant && size <= HOST_BITS_PER_WIDE_INT)
4552           {
4553             unsigned HOST_WIDE_INT mh, ml;
4554             int pre_shift, lgup, post_shift;
4555             HOST_WIDE_INT d = INTVAL (op1);
4556
4557             if (d > 0)
4558               {
4559                 /* We could just as easily deal with negative constants here,
4560                    but it does not seem worth the trouble for GCC 2.6.  */
4561                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4562                   {
4563                     pre_shift = floor_log2 (d);
4564                     if (rem_flag)
4565                       {
4566                         unsigned HOST_WIDE_INT mask
4567                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4568                         remainder = expand_binop
4569                           (compute_mode, and_optab, op0,
4570                            gen_int_mode (mask, compute_mode),
4571                            remainder, 0, OPTAB_LIB_WIDEN);
4572                         if (remainder)
4573                           return gen_lowpart (mode, remainder);
4574                       }
4575                     quotient = expand_shift
4576                       (RSHIFT_EXPR, compute_mode, op0,
4577                        pre_shift, tquotient, 0);
4578                   }
4579                 else
4580                   {
4581                     rtx t1, t2, t3, t4;
4582
4583                     mh = choose_multiplier (d, size, size - 1,
4584                                             &ml, &post_shift, &lgup);
4585                     gcc_assert (!mh);
4586
4587                     if (post_shift < BITS_PER_WORD
4588                         && size - 1 < BITS_PER_WORD)
4589                       {
4590                         t1 = expand_shift
4591                           (RSHIFT_EXPR, compute_mode, op0,
4592                            size - 1, NULL_RTX, 0);
4593                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4594                                            NULL_RTX, 0, OPTAB_WIDEN);
4595                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4596                                       + shift_cost (speed, compute_mode, size - 1)
4597                                       + 2 * add_cost (speed, compute_mode));
4598                         t3 = expmed_mult_highpart
4599                           (compute_mode, t2, gen_int_mode (ml, compute_mode),
4600                            NULL_RTX, 1, max_cost - extra_cost);
4601                         if (t3 != 0)
4602                           {
4603                             t4 = expand_shift
4604                               (RSHIFT_EXPR, compute_mode, t3,
4605                                post_shift, NULL_RTX, 1);
4606                             quotient = expand_binop (compute_mode, xor_optab,
4607                                                      t4, t1, tquotient, 0,
4608                                                      OPTAB_WIDEN);
4609                           }
4610                       }
4611                   }
4612               }
4613             else
4614               {
4615                 rtx nsign, t1, t2, t3, t4;
4616                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4617                                                   op0, constm1_rtx), NULL_RTX);
4618                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4619                                    0, OPTAB_WIDEN);
4620                 nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
4621                                       size - 1, NULL_RTX, 0);
4622                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4623                                     NULL_RTX);
4624                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4625                                     NULL_RTX, 0);
4626                 if (t4)
4627                   {
4628                     rtx t5;
4629                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4630                                       NULL_RTX, 0);
4631                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4632                                                             t4, t5),
4633                                               tquotient);
4634                   }
4635               }
4636           }
4637
4638         if (quotient != 0)
4639           break;
4640         delete_insns_since (last);
4641
4642         /* Try using an instruction that produces both the quotient and
4643            remainder, using truncation.  We can easily compensate the quotient
4644            or remainder to get floor rounding, once we have the remainder.
4645            Notice that we compute also the final remainder value here,
4646            and return the result right away.  */
4647         if (target == 0 || GET_MODE (target) != compute_mode)
4648           target = gen_reg_rtx (compute_mode);
4649
4650         if (rem_flag)
4651           {
4652             remainder
4653               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4654             quotient = gen_reg_rtx (compute_mode);
4655           }
4656         else
4657           {
4658             quotient
4659               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4660             remainder = gen_reg_rtx (compute_mode);
4661           }
4662
4663         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4664                                  quotient, remainder, 0))
4665           {
4666             /* This could be computed with a branch-less sequence.
4667                Save that for later.  */
4668             rtx tem;
4669             rtx_code_label *label = gen_label_rtx ();
4670             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4671             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4672                                 NULL_RTX, 0, OPTAB_WIDEN);
4673             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4674             expand_dec (quotient, const1_rtx);
4675             expand_inc (remainder, op1);
4676             emit_label (label);
4677             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4678           }
4679
4680         /* No luck with division elimination or divmod.  Have to do it
4681            by conditionally adjusting op0 *and* the result.  */
4682         {
4683           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4684           rtx adjusted_op0;
4685           rtx tem;
4686
4687           quotient = gen_reg_rtx (compute_mode);
4688           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4689           label1 = gen_label_rtx ();
4690           label2 = gen_label_rtx ();
4691           label3 = gen_label_rtx ();
4692           label4 = gen_label_rtx ();
4693           label5 = gen_label_rtx ();
4694           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4695           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4696           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4697                               quotient, 0, OPTAB_LIB_WIDEN);
4698           if (tem != quotient)
4699             emit_move_insn (quotient, tem);
4700           emit_jump_insn (targetm.gen_jump (label5));
4701           emit_barrier ();
4702           emit_label (label1);
4703           expand_inc (adjusted_op0, const1_rtx);
4704           emit_jump_insn (targetm.gen_jump (label4));
4705           emit_barrier ();
4706           emit_label (label2);
4707           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4708           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4709                               quotient, 0, OPTAB_LIB_WIDEN);
4710           if (tem != quotient)
4711             emit_move_insn (quotient, tem);
4712           emit_jump_insn (targetm.gen_jump (label5));
4713           emit_barrier ();
4714           emit_label (label3);
4715           expand_dec (adjusted_op0, const1_rtx);
4716           emit_label (label4);
4717           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4718                               quotient, 0, OPTAB_LIB_WIDEN);
4719           if (tem != quotient)
4720             emit_move_insn (quotient, tem);
4721           expand_dec (quotient, const1_rtx);
4722           emit_label (label5);
4723         }
4724         break;
4725
4726       case CEIL_DIV_EXPR:
4727       case CEIL_MOD_EXPR:
4728         if (unsignedp)
4729           {
4730             if (op1_is_constant
4731                 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4732                 && (size <= HOST_BITS_PER_WIDE_INT
4733                     || INTVAL (op1) >= 0))
4734               {
4735                 rtx t1, t2, t3;
4736                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4737                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4738                                    floor_log2 (d), tquotient, 1);
4739                 t2 = expand_binop (compute_mode, and_optab, op0,
4740                                    gen_int_mode (d - 1, compute_mode),
4741                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4742                 t3 = gen_reg_rtx (compute_mode);
4743                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4744                                       compute_mode, 1, 1);
4745                 if (t3 == 0)
4746                   {
4747                     rtx_code_label *lab;
4748                     lab = gen_label_rtx ();
4749                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4750                     expand_inc (t1, const1_rtx);
4751                     emit_label (lab);
4752                     quotient = t1;
4753                   }
4754                 else
4755                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4756                                                           t1, t3),
4757                                             tquotient);
4758                 break;
4759               }
4760
4761             /* Try using an instruction that produces both the quotient and
4762                remainder, using truncation.  We can easily compensate the
4763                quotient or remainder to get ceiling rounding, once we have the
4764                remainder.  Notice that we compute also the final remainder
4765                value here, and return the result right away.  */
4766             if (target == 0 || GET_MODE (target) != compute_mode)
4767               target = gen_reg_rtx (compute_mode);
4768
4769             if (rem_flag)
4770               {
4771                 remainder = (REG_P (target)
4772                              ? target : gen_reg_rtx (compute_mode));
4773                 quotient = gen_reg_rtx (compute_mode);
4774               }
4775             else
4776               {
4777                 quotient = (REG_P (target)
4778                             ? target : gen_reg_rtx (compute_mode));
4779                 remainder = gen_reg_rtx (compute_mode);
4780               }
4781
4782             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4783                                      remainder, 1))
4784               {
4785                 /* This could be computed with a branch-less sequence.
4786                    Save that for later.  */
4787                 rtx_code_label *label = gen_label_rtx ();
4788                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4789                                  compute_mode, label);
4790                 expand_inc (quotient, const1_rtx);
4791                 expand_dec (remainder, op1);
4792                 emit_label (label);
4793                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4794               }
4795
4796             /* No luck with division elimination or divmod.  Have to do it
4797                by conditionally adjusting op0 *and* the result.  */
4798             {
4799               rtx_code_label *label1, *label2;
4800               rtx adjusted_op0, tem;
4801
4802               quotient = gen_reg_rtx (compute_mode);
4803               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4804               label1 = gen_label_rtx ();
4805               label2 = gen_label_rtx ();
4806               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4807                                compute_mode, label1);
4808               emit_move_insn  (quotient, const0_rtx);
4809               emit_jump_insn (targetm.gen_jump (label2));
4810               emit_barrier ();
4811               emit_label (label1);
4812               expand_dec (adjusted_op0, const1_rtx);
4813               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4814                                   quotient, 1, OPTAB_LIB_WIDEN);
4815               if (tem != quotient)
4816                 emit_move_insn (quotient, tem);
4817               expand_inc (quotient, const1_rtx);
4818               emit_label (label2);
4819             }
4820           }
4821         else /* signed */
4822           {
4823             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4824                 && INTVAL (op1) >= 0)
4825               {
4826                 /* This is extremely similar to the code for the unsigned case
4827                    above.  For 2.7 we should merge these variants, but for
4828                    2.6.1 I don't want to touch the code for unsigned since that
4829                    get used in C.  The signed case will only be used by other
4830                    languages (Ada).  */
4831
4832                 rtx t1, t2, t3;
4833                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4834                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4835                                    floor_log2 (d), tquotient, 0);
4836                 t2 = expand_binop (compute_mode, and_optab, op0,
4837                                    gen_int_mode (d - 1, compute_mode),
4838                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4839                 t3 = gen_reg_rtx (compute_mode);
4840                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4841                                       compute_mode, 1, 1);
4842                 if (t3 == 0)
4843                   {
4844                     rtx_code_label *lab;
4845                     lab = gen_label_rtx ();
4846                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4847                     expand_inc (t1, const1_rtx);
4848                     emit_label (lab);
4849                     quotient = t1;
4850                   }
4851                 else
4852                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4853                                                           t1, t3),
4854                                             tquotient);
4855                 break;
4856               }
4857
4858             /* Try using an instruction that produces both the quotient and
4859                remainder, using truncation.  We can easily compensate the
4860                quotient or remainder to get ceiling rounding, once we have the
4861                remainder.  Notice that we compute also the final remainder
4862                value here, and return the result right away.  */
4863             if (target == 0 || GET_MODE (target) != compute_mode)
4864               target = gen_reg_rtx (compute_mode);
4865             if (rem_flag)
4866               {
4867                 remainder= (REG_P (target)
4868                             ? target : gen_reg_rtx (compute_mode));
4869                 quotient = gen_reg_rtx (compute_mode);
4870               }
4871             else
4872               {
4873                 quotient = (REG_P (target)
4874                             ? target : gen_reg_rtx (compute_mode));
4875                 remainder = gen_reg_rtx (compute_mode);
4876               }
4877
4878             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4879                                      remainder, 0))
4880               {
4881                 /* This could be computed with a branch-less sequence.
4882                    Save that for later.  */
4883                 rtx tem;
4884                 rtx_code_label *label = gen_label_rtx ();
4885                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4886                                  compute_mode, label);
4887                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4888                                     NULL_RTX, 0, OPTAB_WIDEN);
4889                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4890                 expand_inc (quotient, const1_rtx);
4891                 expand_dec (remainder, op1);
4892                 emit_label (label);
4893                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4894               }
4895
4896             /* No luck with division elimination or divmod.  Have to do it
4897                by conditionally adjusting op0 *and* the result.  */
4898             {
4899               rtx_code_label *label1, *label2, *label3, *label4, *label5;
4900               rtx adjusted_op0;
4901               rtx tem;
4902
4903               quotient = gen_reg_rtx (compute_mode);
4904               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4905               label1 = gen_label_rtx ();
4906               label2 = gen_label_rtx ();
4907               label3 = gen_label_rtx ();
4908               label4 = gen_label_rtx ();
4909               label5 = gen_label_rtx ();
4910               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4911               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4912                                compute_mode, label1);
4913               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4914                                   quotient, 0, OPTAB_LIB_WIDEN);
4915               if (tem != quotient)
4916                 emit_move_insn (quotient, tem);
4917               emit_jump_insn (targetm.gen_jump (label5));
4918               emit_barrier ();
4919               emit_label (label1);
4920               expand_dec (adjusted_op0, const1_rtx);
4921               emit_jump_insn (targetm.gen_jump (label4));
4922               emit_barrier ();
4923               emit_label (label2);
4924               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4925                                compute_mode, label3);
4926               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4927                                   quotient, 0, OPTAB_LIB_WIDEN);
4928               if (tem != quotient)
4929                 emit_move_insn (quotient, tem);
4930               emit_jump_insn (targetm.gen_jump (label5));
4931               emit_barrier ();
4932               emit_label (label3);
4933               expand_inc (adjusted_op0, const1_rtx);
4934               emit_label (label4);
4935               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4936                                   quotient, 0, OPTAB_LIB_WIDEN);
4937               if (tem != quotient)
4938                 emit_move_insn (quotient, tem);
4939               expand_inc (quotient, const1_rtx);
4940               emit_label (label5);
4941             }
4942           }
4943         break;
4944
4945       case EXACT_DIV_EXPR:
4946         if (op1_is_constant && size <= HOST_BITS_PER_WIDE_INT)
4947           {
4948             HOST_WIDE_INT d = INTVAL (op1);
4949             unsigned HOST_WIDE_INT ml;
4950             int pre_shift;
4951             rtx t1;
4952
4953             pre_shift = ctz_or_zero (d);
4954             ml = invert_mod2n (d >> pre_shift, size);
4955             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4956                                pre_shift, NULL_RTX, unsignedp);
4957             quotient = expand_mult (compute_mode, t1,
4958                                     gen_int_mode (ml, compute_mode),
4959                                     NULL_RTX, 1);
4960
4961             insn = get_last_insn ();
4962             set_dst_reg_note (insn, REG_EQUAL,
4963                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4964                                               compute_mode, op0, op1),
4965                               quotient);
4966           }
4967         break;
4968
4969       case ROUND_DIV_EXPR:
4970       case ROUND_MOD_EXPR:
4971         if (unsignedp)
4972           {
4973             rtx tem;
4974             rtx_code_label *label;
4975             label = gen_label_rtx ();
4976             quotient = gen_reg_rtx (compute_mode);
4977             remainder = gen_reg_rtx (compute_mode);
4978             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4979               {
4980                 rtx tem;
4981                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4982                                          quotient, 1, OPTAB_LIB_WIDEN);
4983                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4984                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4985                                           remainder, 1, OPTAB_LIB_WIDEN);
4986               }
4987             tem = plus_constant (compute_mode, op1, -1);
4988             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4989             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4990             expand_inc (quotient, const1_rtx);
4991             expand_dec (remainder, op1);
4992             emit_label (label);
4993           }
4994         else
4995           {
4996             rtx abs_rem, abs_op1, tem, mask;
4997             rtx_code_label *label;
4998             label = gen_label_rtx ();
4999             quotient = gen_reg_rtx (compute_mode);
5000             remainder = gen_reg_rtx (compute_mode);
5001             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
5002               {
5003                 rtx tem;
5004                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
5005                                          quotient, 0, OPTAB_LIB_WIDEN);
5006                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
5007                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
5008                                           remainder, 0, OPTAB_LIB_WIDEN);
5009               }
5010             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
5011             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
5012             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
5013                                 1, NULL_RTX, 1);
5014             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
5015             tem = expand_binop (compute_mode, xor_optab, op0, op1,
5016                                 NULL_RTX, 0, OPTAB_WIDEN);
5017             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
5018                                  size - 1, NULL_RTX, 0);
5019             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
5020                                 NULL_RTX, 0, OPTAB_WIDEN);
5021             tem = expand_binop (compute_mode, sub_optab, tem, mask,
5022                                 NULL_RTX, 0, OPTAB_WIDEN);
5023             expand_inc (quotient, tem);
5024             tem = expand_binop (compute_mode, xor_optab, mask, op1,
5025                                 NULL_RTX, 0, OPTAB_WIDEN);
5026             tem = expand_binop (compute_mode, sub_optab, tem, mask,
5027                                 NULL_RTX, 0, OPTAB_WIDEN);
5028             expand_dec (remainder, tem);
5029             emit_label (label);
5030           }
5031         return gen_lowpart (mode, rem_flag ? remainder : quotient);
5032
5033       default:
5034         gcc_unreachable ();
5035       }
5036
5037   if (quotient == 0)
5038     {
5039       if (target && GET_MODE (target) != compute_mode)
5040         target = 0;
5041
5042       if (rem_flag)
5043         {
5044           /* Try to produce the remainder without producing the quotient.
5045              If we seem to have a divmod pattern that does not require widening,
5046              don't try widening here.  We should really have a WIDEN argument
5047              to expand_twoval_binop, since what we'd really like to do here is
5048              1) try a mod insn in compute_mode
5049              2) try a divmod insn in compute_mode
5050              3) try a div insn in compute_mode and multiply-subtract to get
5051                 remainder
5052              4) try the same things with widening allowed.  */
5053           remainder
5054             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5055                                  op0, op1, target,
5056                                  unsignedp,
5057                                  ((optab_handler (optab2, compute_mode)
5058                                    != CODE_FOR_nothing)
5059                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
5060           if (remainder == 0)
5061             {
5062               /* No luck there.  Can we do remainder and divide at once
5063                  without a library call?  */
5064               remainder = gen_reg_rtx (compute_mode);
5065               if (! expand_twoval_binop ((unsignedp
5066                                           ? udivmod_optab
5067                                           : sdivmod_optab),
5068                                          op0, op1,
5069                                          NULL_RTX, remainder, unsignedp))
5070                 remainder = 0;
5071             }
5072
5073           if (remainder)
5074             return gen_lowpart (mode, remainder);
5075         }
5076
5077       /* Produce the quotient.  Try a quotient insn, but not a library call.
5078          If we have a divmod in this mode, use it in preference to widening
5079          the div (for this test we assume it will not fail). Note that optab2
5080          is set to the one of the two optabs that the call below will use.  */
5081       quotient
5082         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5083                              op0, op1, rem_flag ? NULL_RTX : target,
5084                              unsignedp,
5085                              ((optab_handler (optab2, compute_mode)
5086                                != CODE_FOR_nothing)
5087                               ? OPTAB_DIRECT : OPTAB_WIDEN));
5088
5089       if (quotient == 0)
5090         {
5091           /* No luck there.  Try a quotient-and-remainder insn,
5092              keeping the quotient alone.  */
5093           quotient = gen_reg_rtx (compute_mode);
5094           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5095                                      op0, op1,
5096                                      quotient, NULL_RTX, unsignedp))
5097             {
5098               quotient = 0;
5099               if (! rem_flag)
5100                 /* Still no luck.  If we are not computing the remainder,
5101                    use a library call for the quotient.  */
5102                 quotient = sign_expand_binop (compute_mode,
5103                                               udiv_optab, sdiv_optab,
5104                                               op0, op1, target,
5105                                               unsignedp, OPTAB_LIB_WIDEN);
5106             }
5107         }
5108     }
5109
5110   if (rem_flag)
5111     {
5112       if (target && GET_MODE (target) != compute_mode)
5113         target = 0;
5114
5115       if (quotient == 0)
5116         {
5117           /* No divide instruction either.  Use library for remainder.  */
5118           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5119                                          op0, op1, target,
5120                                          unsignedp, OPTAB_LIB_WIDEN);
5121           /* No remainder function.  Try a quotient-and-remainder
5122              function, keeping the remainder.  */
5123           if (!remainder)
5124             {
5125               remainder = gen_reg_rtx (compute_mode);
5126               if (!expand_twoval_binop_libfunc
5127                   (unsignedp ? udivmod_optab : sdivmod_optab,
5128                    op0, op1,
5129                    NULL_RTX, remainder,
5130                    unsignedp ? UMOD : MOD))
5131                 remainder = NULL_RTX;
5132             }
5133         }
5134       else
5135         {
5136           /* We divided.  Now finish doing X - Y * (X / Y).  */
5137           remainder = expand_mult (compute_mode, quotient, op1,
5138                                    NULL_RTX, unsignedp);
5139           remainder = expand_binop (compute_mode, sub_optab, op0,
5140                                     remainder, target, unsignedp,
5141                                     OPTAB_LIB_WIDEN);
5142         }
5143     }
5144
5145   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5146 }
5147 \f
5148 /* Return a tree node with data type TYPE, describing the value of X.
5149    Usually this is an VAR_DECL, if there is no obvious better choice.
5150    X may be an expression, however we only support those expressions
5151    generated by loop.c.  */
5152
5153 tree
5154 make_tree (tree type, rtx x)
5155 {
5156   tree t;
5157
5158   switch (GET_CODE (x))
5159     {
5160     case CONST_INT:
5161     case CONST_WIDE_INT:
5162       t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5163       return t;
5164
5165     case CONST_DOUBLE:
5166       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5167       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5168         t = wide_int_to_tree (type,
5169                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5170                                                     HOST_BITS_PER_WIDE_INT * 2));
5171       else
5172         t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5173
5174       return t;
5175
5176     case CONST_VECTOR:
5177       {
5178         int units = CONST_VECTOR_NUNITS (x);
5179         tree itype = TREE_TYPE (type);
5180         tree *elts;
5181         int i;
5182
5183         /* Build a tree with vector elements.  */
5184         elts = XALLOCAVEC (tree, units);
5185         for (i = units - 1; i >= 0; --i)
5186           {
5187             rtx elt = CONST_VECTOR_ELT (x, i);
5188             elts[i] = make_tree (itype, elt);
5189           }
5190
5191         return build_vector (type, elts);
5192       }
5193
5194     case PLUS:
5195       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5196                           make_tree (type, XEXP (x, 1)));
5197
5198     case MINUS:
5199       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5200                           make_tree (type, XEXP (x, 1)));
5201
5202     case NEG:
5203       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5204
5205     case MULT:
5206       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5207                           make_tree (type, XEXP (x, 1)));
5208
5209     case ASHIFT:
5210       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5211                           make_tree (type, XEXP (x, 1)));
5212
5213     case LSHIFTRT:
5214       t = unsigned_type_for (type);
5215       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5216                                          make_tree (t, XEXP (x, 0)),
5217                                          make_tree (type, XEXP (x, 1))));
5218
5219     case ASHIFTRT:
5220       t = signed_type_for (type);
5221       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5222                                          make_tree (t, XEXP (x, 0)),
5223                                          make_tree (type, XEXP (x, 1))));
5224
5225     case DIV:
5226       if (TREE_CODE (type) != REAL_TYPE)
5227         t = signed_type_for (type);
5228       else
5229         t = type;
5230
5231       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5232                                          make_tree (t, XEXP (x, 0)),
5233                                          make_tree (t, XEXP (x, 1))));
5234     case UDIV:
5235       t = unsigned_type_for (type);
5236       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5237                                          make_tree (t, XEXP (x, 0)),
5238                                          make_tree (t, XEXP (x, 1))));
5239
5240     case SIGN_EXTEND:
5241     case ZERO_EXTEND:
5242       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5243                                           GET_CODE (x) == ZERO_EXTEND);
5244       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5245
5246     case CONST:
5247       return make_tree (type, XEXP (x, 0));
5248
5249     case SYMBOL_REF:
5250       t = SYMBOL_REF_DECL (x);
5251       if (t)
5252         return fold_convert (type, build_fold_addr_expr (t));
5253       /* fall through.  */
5254
5255     default:
5256       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5257
5258       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5259          address mode to pointer mode.  */
5260       if (POINTER_TYPE_P (type))
5261         x = convert_memory_address_addr_space
5262               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5263
5264       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5265          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5266       t->decl_with_rtl.rtl = x;
5267
5268       return t;
5269     }
5270 }
5271 \f
5272 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5273    and returning TARGET.
5274
5275    If TARGET is 0, a pseudo-register or constant is returned.  */
5276
5277 rtx
5278 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5279 {
5280   rtx tem = 0;
5281
5282   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5283     tem = simplify_binary_operation (AND, mode, op0, op1);
5284   if (tem == 0)
5285     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5286
5287   if (target == 0)
5288     target = tem;
5289   else if (tem != target)
5290     emit_move_insn (target, tem);
5291   return target;
5292 }
5293
5294 /* Helper function for emit_store_flag.  */
5295 rtx
5296 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5297              machine_mode mode, machine_mode compare_mode,
5298              int unsignedp, rtx x, rtx y, int normalizep,
5299              machine_mode target_mode)
5300 {
5301   struct expand_operand ops[4];
5302   rtx op0, comparison, subtarget;
5303   rtx_insn *last;
5304   machine_mode result_mode = targetm.cstore_mode (icode);
5305
5306   last = get_last_insn ();
5307   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5308   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5309   if (!x || !y)
5310     {
5311       delete_insns_since (last);
5312       return NULL_RTX;
5313     }
5314
5315   if (target_mode == VOIDmode)
5316     target_mode = result_mode;
5317   if (!target)
5318     target = gen_reg_rtx (target_mode);
5319
5320   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5321
5322   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5323   create_fixed_operand (&ops[1], comparison);
5324   create_fixed_operand (&ops[2], x);
5325   create_fixed_operand (&ops[3], y);
5326   if (!maybe_expand_insn (icode, 4, ops))
5327     {
5328       delete_insns_since (last);
5329       return NULL_RTX;
5330     }
5331   subtarget = ops[0].value;
5332
5333   /* If we are converting to a wider mode, first convert to
5334      TARGET_MODE, then normalize.  This produces better combining
5335      opportunities on machines that have a SIGN_EXTRACT when we are
5336      testing a single bit.  This mostly benefits the 68k.
5337
5338      If STORE_FLAG_VALUE does not have the sign bit set when
5339      interpreted in MODE, we can do this conversion as unsigned, which
5340      is usually more efficient.  */
5341   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5342     {
5343       convert_move (target, subtarget,
5344                     val_signbit_known_clear_p (result_mode,
5345                                                STORE_FLAG_VALUE));
5346       op0 = target;
5347       result_mode = target_mode;
5348     }
5349   else
5350     op0 = subtarget;
5351
5352   /* If we want to keep subexpressions around, don't reuse our last
5353      target.  */
5354   if (optimize)
5355     subtarget = 0;
5356
5357   /* Now normalize to the proper value in MODE.  Sometimes we don't
5358      have to do anything.  */
5359   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5360     ;
5361   /* STORE_FLAG_VALUE might be the most negative number, so write
5362      the comparison this way to avoid a compiler-time warning.  */
5363   else if (- normalizep == STORE_FLAG_VALUE)
5364     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5365
5366   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5367      it hard to use a value of just the sign bit due to ANSI integer
5368      constant typing rules.  */
5369   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5370     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5371                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5372                         normalizep == 1);
5373   else
5374     {
5375       gcc_assert (STORE_FLAG_VALUE & 1);
5376
5377       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5378       if (normalizep == -1)
5379         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5380     }
5381
5382   /* If we were converting to a smaller mode, do the conversion now.  */
5383   if (target_mode != result_mode)
5384     {
5385       convert_move (target, op0, 0);
5386       return target;
5387     }
5388   else
5389     return op0;
5390 }
5391
5392
5393 /* A subroutine of emit_store_flag only including "tricks" that do not
5394    need a recursive call.  These are kept separate to avoid infinite
5395    loops.  */
5396
5397 static rtx
5398 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5399                    machine_mode mode, int unsignedp, int normalizep,
5400                    machine_mode target_mode)
5401 {
5402   rtx subtarget;
5403   enum insn_code icode;
5404   machine_mode compare_mode;
5405   enum mode_class mclass;
5406   enum rtx_code scode;
5407
5408   if (unsignedp)
5409     code = unsigned_condition (code);
5410   scode = swap_condition (code);
5411
5412   /* If one operand is constant, make it the second one.  Only do this
5413      if the other operand is not constant as well.  */
5414
5415   if (swap_commutative_operands_p (op0, op1))
5416     {
5417       std::swap (op0, op1);
5418       code = swap_condition (code);
5419     }
5420
5421   if (mode == VOIDmode)
5422     mode = GET_MODE (op0);
5423
5424   /* For some comparisons with 1 and -1, we can convert this to
5425      comparisons with zero.  This will often produce more opportunities for
5426      store-flag insns.  */
5427
5428   switch (code)
5429     {
5430     case LT:
5431       if (op1 == const1_rtx)
5432         op1 = const0_rtx, code = LE;
5433       break;
5434     case LE:
5435       if (op1 == constm1_rtx)
5436         op1 = const0_rtx, code = LT;
5437       break;
5438     case GE:
5439       if (op1 == const1_rtx)
5440         op1 = const0_rtx, code = GT;
5441       break;
5442     case GT:
5443       if (op1 == constm1_rtx)
5444         op1 = const0_rtx, code = GE;
5445       break;
5446     case GEU:
5447       if (op1 == const1_rtx)
5448         op1 = const0_rtx, code = NE;
5449       break;
5450     case LTU:
5451       if (op1 == const1_rtx)
5452         op1 = const0_rtx, code = EQ;
5453       break;
5454     default:
5455       break;
5456     }
5457
5458   /* If we are comparing a double-word integer with zero or -1, we can
5459      convert the comparison into one involving a single word.  */
5460   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5461       && GET_MODE_CLASS (mode) == MODE_INT
5462       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5463     {
5464       rtx tem;
5465       if ((code == EQ || code == NE)
5466           && (op1 == const0_rtx || op1 == constm1_rtx))
5467         {
5468           rtx op00, op01;
5469
5470           /* Do a logical OR or AND of the two words and compare the
5471              result.  */
5472           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5473           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5474           tem = expand_binop (word_mode,
5475                               op1 == const0_rtx ? ior_optab : and_optab,
5476                               op00, op01, NULL_RTX, unsignedp,
5477                               OPTAB_DIRECT);
5478
5479           if (tem != 0)
5480             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5481                                    unsignedp, normalizep);
5482         }
5483       else if ((code == LT || code == GE) && op1 == const0_rtx)
5484         {
5485           rtx op0h;
5486
5487           /* If testing the sign bit, can just test on high word.  */
5488           op0h = simplify_gen_subreg (word_mode, op0, mode,
5489                                       subreg_highpart_offset (word_mode,
5490                                                               mode));
5491           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5492                                  unsignedp, normalizep);
5493         }
5494       else
5495         tem = NULL_RTX;
5496
5497       if (tem)
5498         {
5499           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5500             return tem;
5501           if (!target)
5502             target = gen_reg_rtx (target_mode);
5503
5504           convert_move (target, tem,
5505                         !val_signbit_known_set_p (word_mode,
5506                                                   (normalizep ? normalizep
5507                                                    : STORE_FLAG_VALUE)));
5508           return target;
5509         }
5510     }
5511
5512   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5513      complement of A (for GE) and shifting the sign bit to the low bit.  */
5514   if (op1 == const0_rtx && (code == LT || code == GE)
5515       && GET_MODE_CLASS (mode) == MODE_INT
5516       && (normalizep || STORE_FLAG_VALUE == 1
5517           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5518     {
5519       subtarget = target;
5520
5521       if (!target)
5522         target_mode = mode;
5523
5524       /* If the result is to be wider than OP0, it is best to convert it
5525          first.  If it is to be narrower, it is *incorrect* to convert it
5526          first.  */
5527       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5528         {
5529           op0 = convert_modes (target_mode, mode, op0, 0);
5530           mode = target_mode;
5531         }
5532
5533       if (target_mode != mode)
5534         subtarget = 0;
5535
5536       if (code == GE)
5537         op0 = expand_unop (mode, one_cmpl_optab, op0,
5538                            ((STORE_FLAG_VALUE == 1 || normalizep)
5539                             ? 0 : subtarget), 0);
5540
5541       if (STORE_FLAG_VALUE == 1 || normalizep)
5542         /* If we are supposed to produce a 0/1 value, we want to do
5543            a logical shift from the sign bit to the low-order bit; for
5544            a -1/0 value, we do an arithmetic shift.  */
5545         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5546                             GET_MODE_BITSIZE (mode) - 1,
5547                             subtarget, normalizep != -1);
5548
5549       if (mode != target_mode)
5550         op0 = convert_modes (target_mode, mode, op0, 0);
5551
5552       return op0;
5553     }
5554
5555   mclass = GET_MODE_CLASS (mode);
5556   FOR_EACH_MODE_FROM (compare_mode, mode)
5557     {
5558      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5559      icode = optab_handler (cstore_optab, optab_mode);
5560      if (icode != CODE_FOR_nothing)
5561         {
5562           do_pending_stack_adjust ();
5563           rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5564                                  unsignedp, op0, op1, normalizep, target_mode);
5565           if (tem)
5566             return tem;
5567
5568           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5569             {
5570               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5571                                  unsignedp, op1, op0, normalizep, target_mode);
5572               if (tem)
5573                 return tem;
5574             }
5575           break;
5576         }
5577     }
5578
5579   return 0;
5580 }
5581
5582 /* Subroutine of emit_store_flag that handles cases in which the operands
5583    are scalar integers.  SUBTARGET is the target to use for temporary
5584    operations and TRUEVAL is the value to store when the condition is
5585    true.  All other arguments are as for emit_store_flag.  */
5586
5587 rtx
5588 emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0,
5589                      rtx op1, machine_mode mode, int unsignedp,
5590                      int normalizep, rtx trueval)
5591 {
5592   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5593   rtx_insn *last = get_last_insn ();
5594   rtx tem;
5595
5596   /* If this is an equality comparison of integers, we can try to exclusive-or
5597      (or subtract) the two operands and use a recursive call to try the
5598      comparison with zero.  Don't do any of these cases if branches are
5599      very cheap.  */
5600
5601   if ((code == EQ || code == NE) && op1 != const0_rtx)
5602     {
5603       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5604                           OPTAB_WIDEN);
5605
5606       if (tem == 0)
5607         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5608                             OPTAB_WIDEN);
5609       if (tem != 0)
5610         tem = emit_store_flag (target, code, tem, const0_rtx,
5611                                mode, unsignedp, normalizep);
5612       if (tem != 0)
5613         return tem;
5614
5615       delete_insns_since (last);
5616     }
5617
5618   /* For integer comparisons, try the reverse comparison.  However, for
5619      small X and if we'd have anyway to extend, implementing "X != 0"
5620      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5621   rtx_code rcode = reverse_condition (code);
5622   if (can_compare_p (rcode, mode, ccp_store_flag)
5623       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5624             && code == NE
5625             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5626             && op1 == const0_rtx))
5627     {
5628       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5629                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5630
5631       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5632       if (want_add
5633           && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5634                        optimize_insn_for_speed_p ()) == 0)
5635         {
5636           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5637                                    STORE_FLAG_VALUE, target_mode);
5638           if (tem != 0)
5639             tem = expand_binop (target_mode, add_optab, tem,
5640                                 gen_int_mode (normalizep, target_mode),
5641                                 target, 0, OPTAB_WIDEN);
5642         }
5643       else if (!want_add
5644                && rtx_cost (trueval, mode, XOR, 1,
5645                             optimize_insn_for_speed_p ()) == 0)
5646         {
5647           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5648                                    normalizep, target_mode);
5649           if (tem != 0)
5650             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5651                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5652         }
5653
5654       if (tem != 0)
5655         return tem;
5656       delete_insns_since (last);
5657     }
5658
5659   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5660      the constant zero.  Reject all other comparisons at this point.  Only
5661      do LE and GT if branches are expensive since they are expensive on
5662      2-operand machines.  */
5663
5664   if (op1 != const0_rtx
5665       || (code != EQ && code != NE
5666           && (BRANCH_COST (optimize_insn_for_speed_p (),
5667                            false) <= 1 || (code != LE && code != GT))))
5668     return 0;
5669
5670   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5671      do the necessary operation below.  */
5672
5673   tem = 0;
5674
5675   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5676      the sign bit set.  */
5677
5678   if (code == LE)
5679     {
5680       /* This is destructive, so SUBTARGET can't be OP0.  */
5681       if (rtx_equal_p (subtarget, op0))
5682         subtarget = 0;
5683
5684       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5685                           OPTAB_WIDEN);
5686       if (tem)
5687         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5688                             OPTAB_WIDEN);
5689     }
5690
5691   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5692      number of bits in the mode of OP0, minus one.  */
5693
5694   if (code == GT)
5695     {
5696       if (rtx_equal_p (subtarget, op0))
5697         subtarget = 0;
5698
5699       tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5700                                 GET_MODE_BITSIZE (mode) - 1,
5701                                 subtarget, 0);
5702       if (tem)
5703         tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5704                             OPTAB_WIDEN);
5705     }
5706
5707   if (code == EQ || code == NE)
5708     {
5709       /* For EQ or NE, one way to do the comparison is to apply an operation
5710          that converts the operand into a positive number if it is nonzero
5711          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5712          for NE we negate.  This puts the result in the sign bit.  Then we
5713          normalize with a shift, if needed.
5714
5715          Two operations that can do the above actions are ABS and FFS, so try
5716          them.  If that doesn't work, and MODE is smaller than a full word,
5717          we can use zero-extension to the wider mode (an unsigned conversion)
5718          as the operation.  */
5719
5720       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5721          that is compensated by the subsequent overflow when subtracting
5722          one / negating.  */
5723
5724       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5725         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5726       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5727         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5728       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5729         {
5730           tem = convert_modes (word_mode, mode, op0, 1);
5731           mode = word_mode;
5732         }
5733
5734       if (tem != 0)
5735         {
5736           if (code == EQ)
5737             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5738                                 0, OPTAB_WIDEN);
5739           else
5740             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5741         }
5742
5743       /* If we couldn't do it that way, for NE we can "or" the two's complement
5744          of the value with itself.  For EQ, we take the one's complement of
5745          that "or", which is an extra insn, so we only handle EQ if branches
5746          are expensive.  */
5747
5748       if (tem == 0
5749           && (code == NE
5750               || BRANCH_COST (optimize_insn_for_speed_p (),
5751                               false) > 1))
5752         {
5753           if (rtx_equal_p (subtarget, op0))
5754             subtarget = 0;
5755
5756           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5757           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5758                               OPTAB_WIDEN);
5759
5760           if (tem && code == EQ)
5761             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5762         }
5763     }
5764
5765   if (tem && normalizep)
5766     tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5767                               GET_MODE_BITSIZE (mode) - 1,
5768                               subtarget, normalizep == 1);
5769
5770   if (tem)
5771     {
5772       if (!target)
5773         ;
5774       else if (GET_MODE (tem) != target_mode)
5775         {
5776           convert_move (target, tem, 0);
5777           tem = target;
5778         }
5779       else if (!subtarget)
5780         {
5781           emit_move_insn (target, tem);
5782           tem = target;
5783         }
5784     }
5785   else
5786     delete_insns_since (last);
5787
5788   return tem;
5789 }
5790
5791 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5792    and storing in TARGET.  Normally return TARGET.
5793    Return 0 if that cannot be done.
5794
5795    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5796    it is VOIDmode, they cannot both be CONST_INT.
5797
5798    UNSIGNEDP is for the case where we have to widen the operands
5799    to perform the operation.  It says to use zero-extension.
5800
5801    NORMALIZEP is 1 if we should convert the result to be either zero
5802    or one.  Normalize is -1 if we should convert the result to be
5803    either zero or -1.  If NORMALIZEP is zero, the result will be left
5804    "raw" out of the scc insn.  */
5805
5806 rtx
5807 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5808                  machine_mode mode, int unsignedp, int normalizep)
5809 {
5810   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5811   enum rtx_code rcode;
5812   rtx subtarget;
5813   rtx tem, trueval;
5814   rtx_insn *last;
5815
5816   /* If we compare constants, we shouldn't use a store-flag operation,
5817      but a constant load.  We can get there via the vanilla route that
5818      usually generates a compare-branch sequence, but will in this case
5819      fold the comparison to a constant, and thus elide the branch.  */
5820   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5821     return NULL_RTX;
5822
5823   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5824                            target_mode);
5825   if (tem)
5826     return tem;
5827
5828   /* If we reached here, we can't do this with a scc insn, however there
5829      are some comparisons that can be done in other ways.  Don't do any
5830      of these cases if branches are very cheap.  */
5831   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5832     return 0;
5833
5834   /* See what we need to return.  We can only return a 1, -1, or the
5835      sign bit.  */
5836
5837   if (normalizep == 0)
5838     {
5839       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5840         normalizep = STORE_FLAG_VALUE;
5841
5842       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5843         ;
5844       else
5845         return 0;
5846     }
5847
5848   last = get_last_insn ();
5849
5850   /* If optimizing, use different pseudo registers for each insn, instead
5851      of reusing the same pseudo.  This leads to better CSE, but slows
5852      down the compiler, since there are more pseudos.  */
5853   subtarget = (!optimize
5854                && (target_mode == mode)) ? target : NULL_RTX;
5855   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5856
5857   /* For floating-point comparisons, try the reverse comparison or try
5858      changing the "orderedness" of the comparison.  */
5859   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5860     {
5861       enum rtx_code first_code;
5862       bool and_them;
5863
5864       rcode = reverse_condition_maybe_unordered (code);
5865       if (can_compare_p (rcode, mode, ccp_store_flag)
5866           && (code == ORDERED || code == UNORDERED
5867               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5868               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5869         {
5870           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5871                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5872
5873           /* For the reverse comparison, use either an addition or a XOR.  */
5874           if (want_add
5875               && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5876                            optimize_insn_for_speed_p ()) == 0)
5877             {
5878               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5879                                        STORE_FLAG_VALUE, target_mode);
5880               if (tem)
5881                 return expand_binop (target_mode, add_optab, tem,
5882                                      gen_int_mode (normalizep, target_mode),
5883                                      target, 0, OPTAB_WIDEN);
5884             }
5885           else if (!want_add
5886                    && rtx_cost (trueval, mode, XOR, 1,
5887                                 optimize_insn_for_speed_p ()) == 0)
5888             {
5889               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5890                                        normalizep, target_mode);
5891               if (tem)
5892                 return expand_binop (target_mode, xor_optab, tem, trueval,
5893                                      target, INTVAL (trueval) >= 0,
5894                                      OPTAB_WIDEN);
5895             }
5896         }
5897
5898       delete_insns_since (last);
5899
5900       /* Cannot split ORDERED and UNORDERED, only try the above trick.  */
5901       if (code == ORDERED || code == UNORDERED)
5902         return 0;
5903
5904       and_them = split_comparison (code, mode, &first_code, &code);
5905
5906       /* If there are no NaNs, the first comparison should always fall through.
5907          Effectively change the comparison to the other one.  */
5908       if (!HONOR_NANS (mode))
5909         {
5910           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5911           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5912                                     target_mode);
5913         }
5914
5915       if (!HAVE_conditional_move)
5916         return 0;
5917
5918       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5919          conditional move.  */
5920       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5921                                normalizep, target_mode);
5922       if (tem == 0)
5923         return 0;
5924
5925       if (and_them)
5926         tem = emit_conditional_move (target, code, op0, op1, mode,
5927                                      tem, const0_rtx, GET_MODE (tem), 0);
5928       else
5929         tem = emit_conditional_move (target, code, op0, op1, mode,
5930                                      trueval, tem, GET_MODE (tem), 0);
5931
5932       if (tem == 0)
5933         delete_insns_since (last);
5934       return tem;
5935     }
5936
5937   /* The remaining tricks only apply to integer comparisons.  */
5938
5939   if (GET_MODE_CLASS (mode) == MODE_INT)
5940     return emit_store_flag_int (target, subtarget, code, op0, op1, mode,
5941                                 unsignedp, normalizep, trueval);
5942
5943   return 0;
5944 }
5945
5946 /* Like emit_store_flag, but always succeeds.  */
5947
5948 rtx
5949 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5950                        machine_mode mode, int unsignedp, int normalizep)
5951 {
5952   rtx tem;
5953   rtx_code_label *label;
5954   rtx trueval, falseval;
5955
5956   /* First see if emit_store_flag can do the job.  */
5957   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5958   if (tem != 0)
5959     return tem;
5960
5961   if (!target)
5962     target = gen_reg_rtx (word_mode);
5963
5964   /* If this failed, we have to do this with set/compare/jump/set code.
5965      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5966   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5967   if (code == NE
5968       && GET_MODE_CLASS (mode) == MODE_INT
5969       && REG_P (target)
5970       && op0 == target
5971       && op1 == const0_rtx)
5972     {
5973       label = gen_label_rtx ();
5974       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
5975                                NULL_RTX, NULL, label,
5976                                profile_probability::uninitialized ());
5977       emit_move_insn (target, trueval);
5978       emit_label (label);
5979       return target;
5980     }
5981
5982   if (!REG_P (target)
5983       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5984     target = gen_reg_rtx (GET_MODE (target));
5985
5986   /* Jump in the right direction if the target cannot implement CODE
5987      but can jump on its reverse condition.  */
5988   falseval = const0_rtx;
5989   if (! can_compare_p (code, mode, ccp_jump)
5990       && (! FLOAT_MODE_P (mode)
5991           || code == ORDERED || code == UNORDERED
5992           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5993           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5994     {
5995       enum rtx_code rcode;
5996       if (FLOAT_MODE_P (mode))
5997         rcode = reverse_condition_maybe_unordered (code);
5998       else
5999         rcode = reverse_condition (code);
6000
6001       /* Canonicalize to UNORDERED for the libcall.  */
6002       if (can_compare_p (rcode, mode, ccp_jump)
6003           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
6004         {
6005           falseval = trueval;
6006           trueval = const0_rtx;
6007           code = rcode;
6008         }
6009     }
6010
6011   emit_move_insn (target, trueval);
6012   label = gen_label_rtx ();
6013   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
6014                            label, profile_probability::uninitialized ());
6015
6016   emit_move_insn (target, falseval);
6017   emit_label (label);
6018
6019   return target;
6020 }
6021 \f
6022 /* Perform possibly multi-word comparison and conditional jump to LABEL
6023    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
6024    now a thin wrapper around do_compare_rtx_and_jump.  */
6025
6026 static void
6027 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6028                  rtx_code_label *label)
6029 {
6030   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6031   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6032                            NULL, label, profile_probability::uninitialized ());
6033 }