gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2016 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "target.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "predict.h"
  30 #include "tm_p.h"
  31 #include "expmed.h"
  32 #include "optabs.h"
  33 #include "emit-rtl.h"
  34 #include "diagnostic-core.h"
  35 #include "fold-const.h"
  36 #include "stor-layout.h"
  37 #include "dojump.h"
  38 #include "explow.h"
  39 #include "expr.h"
  40 #include "langhooks.h"
  41
  42 struct target_expmed default_target_expmed;
  43 #if SWITCHABLE_TARGET
  44 struct target_expmed *this_target_expmed = &default_target_expmed;
  45 #endif
  46
  47 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  48                                    unsigned HOST_WIDE_INT,
  49                                    unsigned HOST_WIDE_INT,
  50                                    unsigned HOST_WIDE_INT,
  51                                    rtx, bool);
  52 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
  53                                      unsigned HOST_WIDE_INT,
  54                                      rtx, bool);
  55 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  56                                    unsigned HOST_WIDE_INT,
  57                                    unsigned HOST_WIDE_INT,
  58                                    unsigned HOST_WIDE_INT,
  59                                    rtx, bool);
  60 static rtx extract_fixed_bit_field (machine_mode, rtx,
  61                                     unsigned HOST_WIDE_INT,
  62                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  63 static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
  64                                       unsigned HOST_WIDE_INT,
  65                                       unsigned HOST_WIDE_INT, rtx, int, bool);
  66 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  67 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  68                                     unsigned HOST_WIDE_INT, int, bool);
  69 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  70 static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  71 static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  72
  73 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  74    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  75    The mask is truncated if necessary to the width of mode MODE.  The
  76    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
  77
  78 static inline rtx
  79 mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
  80 {
  81   return immed_wide_int_const
  82     (wi::shifted_mask (bitpos, bitsize, complement,
  83                        GET_MODE_PRECISION (mode)), mode);
  84 }
  85
  86 /* Test whether a value is zero of a power of two.  */
  87 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
  88   (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
  89
  90 struct init_expmed_rtl
  91 {
  92   rtx reg;
  93   rtx plus;
  94   rtx neg;
  95   rtx mult;
  96   rtx sdiv;
  97   rtx udiv;
  98   rtx sdiv_32;
  99   rtx smod_32;
 100   rtx wide_mult;
 101   rtx wide_lshr;
 102   rtx wide_trunc;
 103   rtx shift;
 104   rtx shift_mult;
 105   rtx shift_add;
 106   rtx shift_sub0;
 107   rtx shift_sub1;
 108   rtx zext;
 109   rtx trunc;
 110
 111   rtx pow2[MAX_BITS_PER_WORD];
 112   rtx cint[MAX_BITS_PER_WORD];
 113 };
 114
 115 static void
 116 init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
 117                       machine_mode from_mode, bool speed)
 118 {
 119   int to_size, from_size;
 120   rtx which;
 121
 122   to_size = GET_MODE_PRECISION (to_mode);
 123   from_size = GET_MODE_PRECISION (from_mode);
 124
 125   /* Most partial integers have a precision less than the "full"
 126      integer it requires for storage.  In case one doesn't, for
 127      comparison purposes here, reduce the bit size by one in that
 128      case.  */
 129   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 130       && exact_log2 (to_size) != -1)
 131     to_size --;
 132   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 133       && exact_log2 (from_size) != -1)
 134     from_size --;
 135
 136   /* Assume cost of zero-extend and sign-extend is the same.  */
 137   which = (to_size < from_size ? all->trunc : all->zext);
 138
 139   PUT_MODE (all->reg, from_mode);
 140   set_convert_cost (to_mode, from_mode, speed,
 141                     set_src_cost (which, to_mode, speed));
 142 }
 143
 144 static void
 145 init_expmed_one_mode (struct init_expmed_rtl *all,
 146                       machine_mode mode, int speed)
 147 {
 148   int m, n, mode_bitsize;
 149   machine_mode mode_from;
 150
 151   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 152
 153   PUT_MODE (all->reg, mode);
 154   PUT_MODE (all->plus, mode);
 155   PUT_MODE (all->neg, mode);
 156   PUT_MODE (all->mult, mode);
 157   PUT_MODE (all->sdiv, mode);
 158   PUT_MODE (all->udiv, mode);
 159   PUT_MODE (all->sdiv_32, mode);
 160   PUT_MODE (all->smod_32, mode);
 161   PUT_MODE (all->wide_trunc, mode);
 162   PUT_MODE (all->shift, mode);
 163   PUT_MODE (all->shift_mult, mode);
 164   PUT_MODE (all->shift_add, mode);
 165   PUT_MODE (all->shift_sub0, mode);
 166   PUT_MODE (all->shift_sub1, mode);
 167   PUT_MODE (all->zext, mode);
 168   PUT_MODE (all->trunc, mode);
 169
 170   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
 171   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
 172   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
 173   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
 174   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
 175
 176   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
 177                                      <= 2 * add_cost (speed, mode)));
 178   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
 179                                      <= 4 * add_cost (speed, mode)));
 180
 181   set_shift_cost (speed, mode, 0, 0);
 182   {
 183     int cost = add_cost (speed, mode);
 184     set_shiftadd_cost (speed, mode, 0, cost);
 185     set_shiftsub0_cost (speed, mode, 0, cost);
 186     set_shiftsub1_cost (speed, mode, 0, cost);
 187   }
 188
 189   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 190   for (m = 1; m < n; m++)
 191     {
 192       XEXP (all->shift, 1) = all->cint[m];
 193       XEXP (all->shift_mult, 1) = all->pow2[m];
 194
 195       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
 196       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
 197                                                        speed));
 198       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
 199                                                         speed));
 200       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
 201                                                         speed));
 202     }
 203
 204   if (SCALAR_INT_MODE_P (mode))
 205     {
 206       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 207            mode_from = (machine_mode)(mode_from + 1))
 208         init_expmed_one_conv (all, mode, mode_from, speed);
 209     }
 210   if (GET_MODE_CLASS (mode) == MODE_INT)
 211     {
 212       machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
 213       if (wider_mode != VOIDmode)
 214         {
 215           PUT_MODE (all->zext, wider_mode);
 216           PUT_MODE (all->wide_mult, wider_mode);
 217           PUT_MODE (all->wide_lshr, wider_mode);
 218           XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 219
 220           set_mul_widen_cost (speed, wider_mode,
 221                               set_src_cost (all->wide_mult, wider_mode, speed));
 222           set_mul_highpart_cost (speed, mode,
 223                                  set_src_cost (all->wide_trunc, mode, speed));
 224         }
 225     }
 226 }
 227
 228 void
 229 init_expmed (void)
 230 {
 231   struct init_expmed_rtl all;
 232   machine_mode mode = QImode;
 233   int m, speed;
 234
 235   memset (&all, 0, sizeof all);
 236   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 237     {
 238       all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 239       all.cint[m] = GEN_INT (m);
 240     }
 241
 242   /* Avoid using hard regs in ways which may be unsupported.  */
 243   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 244   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 245   all.neg = gen_rtx_NEG (mode, all.reg);
 246   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 247   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 248   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 249   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 250   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 251   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 252   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 253   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 254   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 255   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 256   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 257   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 258   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 259   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 260   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 261
 262   for (speed = 0; speed < 2; speed++)
 263     {
 264       crtl->maybe_hot_insn_p = speed;
 265       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
 266
 267       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 268            mode = (machine_mode)(mode + 1))
 269         init_expmed_one_mode (&all, mode, speed);
 270
 271       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 272         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 273              mode = (machine_mode)(mode + 1))
 274           init_expmed_one_mode (&all, mode, speed);
 275
 276       if (MIN_MODE_VECTOR_INT != VOIDmode)
 277         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 278              mode = (machine_mode)(mode + 1))
 279           init_expmed_one_mode (&all, mode, speed);
 280     }
 281
 282   if (alg_hash_used_p ())
 283     {
 284       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 285       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 286     }
 287   else
 288     set_alg_hash_used_p (true);
 289   default_rtl_profile ();
 290
 291   ggc_free (all.trunc);
 292   ggc_free (all.shift_sub1);
 293   ggc_free (all.shift_sub0);
 294   ggc_free (all.shift_add);
 295   ggc_free (all.shift_mult);
 296   ggc_free (all.shift);
 297   ggc_free (all.wide_trunc);
 298   ggc_free (all.wide_lshr);
 299   ggc_free (all.wide_mult);
 300   ggc_free (all.zext);
 301   ggc_free (all.smod_32);
 302   ggc_free (all.sdiv_32);
 303   ggc_free (all.udiv);
 304   ggc_free (all.sdiv);
 305   ggc_free (all.mult);
 306   ggc_free (all.neg);
 307   ggc_free (all.plus);
 308   ggc_free (all.reg);
 309 }
 310
 311 /* Return an rtx representing minus the value of X.
 312    MODE is the intended mode of the result,
 313    useful if X is a CONST_INT.  */
 314
 315 rtx
 316 negate_rtx (machine_mode mode, rtx x)
 317 {
 318   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 319
 320   if (result == 0)
 321     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 322
 323   return result;
 324 }
 325
 326 /* Whether reverse storage order is supported on the target.  */
 327 static int reverse_storage_order_supported = -1;
 328
 329 /* Check whether reverse storage order is supported on the target.  */
 330
 331 static void
 332 check_reverse_storage_order_support (void)
 333 {
 334   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 335     {
 336       reverse_storage_order_supported = 0;
 337       sorry ("reverse scalar storage order");
 338     }
 339   else
 340     reverse_storage_order_supported = 1;
 341 }
 342
 343 /* Whether reverse FP storage order is supported on the target.  */
 344 static int reverse_float_storage_order_supported = -1;
 345
 346 /* Check whether reverse FP storage order is supported on the target.  */
 347
 348 static void
 349 check_reverse_float_storage_order_support (void)
 350 {
 351   if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 352     {
 353       reverse_float_storage_order_supported = 0;
 354       sorry ("reverse floating-point scalar storage order");
 355     }
 356   else
 357     reverse_float_storage_order_supported = 1;
 358 }
 359
 360 /* Return an rtx representing value of X with reverse storage order.
 361    MODE is the intended mode of the result,
 362    useful if X is a CONST_INT.  */
 363
 364 rtx
 365 flip_storage_order (enum machine_mode mode, rtx x)
 366 {
 367   enum machine_mode int_mode;
 368   rtx result;
 369
 370   if (mode == QImode)
 371     return x;
 372
 373   if (COMPLEX_MODE_P (mode))
 374     {
 375       rtx real = read_complex_part (x, false);
 376       rtx imag = read_complex_part (x, true);
 377
 378       real = flip_storage_order (GET_MODE_INNER (mode), real);
 379       imag = flip_storage_order (GET_MODE_INNER (mode), imag);
 380
 381       return gen_rtx_CONCAT (mode, real, imag);
 382     }
 383
 384   if (__builtin_expect (reverse_storage_order_supported < 0, 0))
 385     check_reverse_storage_order_support ();
 386
 387   if (SCALAR_INT_MODE_P (mode))
 388     int_mode = mode;
 389   else
 390     {
 391       if (FLOAT_MODE_P (mode)
 392           && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
 393         check_reverse_float_storage_order_support ();
 394
 395       int_mode = mode_for_size (GET_MODE_PRECISION (mode), MODE_INT, 0);
 396       if (int_mode == BLKmode)
 397         {
 398           sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
 399           return x;
 400         }
 401       x = gen_lowpart (int_mode, x);
 402     }
 403
 404   result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
 405   if (result == 0)
 406     result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
 407
 408   if (int_mode != mode)
 409     result = gen_lowpart (mode, result);
 410
 411   return result;
 412 }
 413
 414 /* Adjust bitfield memory MEM so that it points to the first unit of mode
 415    MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
 416    If MODE is BLKmode, return a reference to every byte in the bitfield.
 417    Set *NEW_BITNUM to the bit position of the field within the new memory.  */
 418
 419 static rtx
 420 narrow_bit_field_mem (rtx mem, machine_mode mode,
 421                       unsigned HOST_WIDE_INT bitsize,
 422                       unsigned HOST_WIDE_INT bitnum,
 423                       unsigned HOST_WIDE_INT *new_bitnum)
 424 {
 425   if (mode == BLKmode)
 426     {
 427       *new_bitnum = bitnum % BITS_PER_UNIT;
 428       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 429       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 430                             / BITS_PER_UNIT);
 431       return adjust_bitfield_address_size (mem, mode, offset, size);
 432     }
 433   else
 434     {
 435       unsigned int unit = GET_MODE_BITSIZE (mode);
 436       *new_bitnum = bitnum % unit;
 437       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 438       return adjust_bitfield_address (mem, mode, offset);
 439     }
 440 }
 441
 442 /* The caller wants to perform insertion or extraction PATTERN on a
 443    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 444    BITREGION_START and BITREGION_END are as for store_bit_field
 445    and FIELDMODE is the natural mode of the field.
 446
 447    Search for a mode that is compatible with the memory access
 448    restrictions and (where applicable) with a register insertion or
 449    extraction.  Return the new memory on success, storing the adjusted
 450    bit position in *NEW_BITNUM.  Return null otherwise.  */
 451
 452 static rtx
 453 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 454                               rtx op0, HOST_WIDE_INT bitsize,
 455                               HOST_WIDE_INT bitnum,
 456                               unsigned HOST_WIDE_INT bitregion_start,
 457                               unsigned HOST_WIDE_INT bitregion_end,
 458                               machine_mode fieldmode,
 459                               unsigned HOST_WIDE_INT *new_bitnum)
 460 {
 461   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 462                                 bitregion_end, MEM_ALIGN (op0),
 463                                 MEM_VOLATILE_P (op0));
 464   machine_mode best_mode;
 465   if (iter.next_mode (&best_mode))
 466     {
 467       /* We can use a memory in BEST_MODE.  See whether this is true for
 468          any wider modes.  All other things being equal, we prefer to
 469          use the widest mode possible because it tends to expose more
 470          CSE opportunities.  */
 471       if (!iter.prefer_smaller_modes ())
 472         {
 473           /* Limit the search to the mode required by the corresponding
 474              register insertion or extraction instruction, if any.  */
 475           machine_mode limit_mode = word_mode;
 476           extraction_insn insn;
 477           if (get_best_reg_extraction_insn (&insn, pattern,
 478                                             GET_MODE_BITSIZE (best_mode),
 479                                             fieldmode))
 480             limit_mode = insn.field_mode;
 481
 482           machine_mode wider_mode;
 483           while (iter.next_mode (&wider_mode)
 484                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 485             best_mode = wider_mode;
 486         }
 487       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 488                                    new_bitnum);
 489     }
 490   return NULL_RTX;
 491 }
 492
 493 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 494    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 495    offset is then BITNUM / BITS_PER_UNIT.  */
 496
 497 static bool
 498 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 499                      unsigned HOST_WIDE_INT bitsize,
 500                      machine_mode struct_mode)
 501 {
 502   if (BYTES_BIG_ENDIAN)
 503     return (bitnum % BITS_PER_UNIT == 0
 504             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 505                 || (bitnum + bitsize) % BITS_PER_WORD == 0));
 506   else
 507     return bitnum % BITS_PER_WORD == 0;
 508 }
 509
 510 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 511    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 512    Return false if the access would touch memory outside the range
 513    BITREGION_START to BITREGION_END for conformance to the C++ memory
 514    model.  */
 515
 516 static bool
 517 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 518                             unsigned HOST_WIDE_INT bitnum,
 519                             machine_mode fieldmode,
 520                             unsigned HOST_WIDE_INT bitregion_start,
 521                             unsigned HOST_WIDE_INT bitregion_end)
 522 {
 523   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 524
 525   /* -fstrict-volatile-bitfields must be enabled and we must have a
 526      volatile MEM.  */
 527   if (!MEM_P (op0)
 528       || !MEM_VOLATILE_P (op0)
 529       || flag_strict_volatile_bitfields <= 0)
 530     return false;
 531
 532   /* Non-integral modes likely only happen with packed structures.
 533      Punt.  */
 534   if (!SCALAR_INT_MODE_P (fieldmode))
 535     return false;
 536
 537   /* The bit size must not be larger than the field mode, and
 538      the field mode must not be larger than a word.  */
 539   if (bitsize > modesize || modesize > BITS_PER_WORD)
 540     return false;
 541
 542   /* Check for cases of unaligned fields that must be split.  */
 543   if (bitnum % modesize + bitsize > modesize)
 544     return false;
 545
 546   /* The memory must be sufficiently aligned for a MODESIZE access.
 547      This condition guarantees, that the memory access will not
 548      touch anything after the end of the structure.  */
 549   if (MEM_ALIGN (op0) < modesize)
 550     return false;
 551
 552   /* Check for cases where the C++ memory model applies.  */
 553   if (bitregion_end != 0
 554       && (bitnum - bitnum % modesize < bitregion_start
 555           || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
 556     return false;
 557
 558   return true;
 559 }
 560
 561 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 562    bit number BITNUM can be treated as a simple value of mode MODE.  */
 563
 564 static bool
 565 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 566                        unsigned HOST_WIDE_INT bitnum, machine_mode mode)
 567 {
 568   return (MEM_P (op0)
 569           && bitnum % BITS_PER_UNIT == 0
 570           && bitsize == GET_MODE_BITSIZE (mode)
 571           && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
 572               || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
 573                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 574 }
 575 \f
 576 /* Try to use instruction INSV to store VALUE into a field of OP0.
 577    BITSIZE and BITNUM are as for store_bit_field.  */
 578
 579 static bool
 580 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 581                             unsigned HOST_WIDE_INT bitsize,
 582                             unsigned HOST_WIDE_INT bitnum,
 583                             rtx value)
 584 {
 585   struct expand_operand ops[4];
 586   rtx value1;
 587   rtx xop0 = op0;
 588   rtx_insn *last = get_last_insn ();
 589   bool copy_back = false;
 590
 591   machine_mode op_mode = insv->field_mode;
 592   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 593   if (bitsize == 0 || bitsize > unit)
 594     return false;
 595
 596   if (MEM_P (xop0))
 597     /* Get a reference to the first byte of the field.  */
 598     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 599                                  &bitnum);
 600   else
 601     {
 602       /* Convert from counting within OP0 to counting in OP_MODE.  */
 603       if (BYTES_BIG_ENDIAN)
 604         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 605
 606       /* If xop0 is a register, we need it in OP_MODE
 607          to make it acceptable to the format of insv.  */
 608       if (GET_CODE (xop0) == SUBREG)
 609         /* We can't just change the mode, because this might clobber op0,
 610            and we will need the original value of op0 if insv fails.  */
 611         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 612       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 613         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 614     }
 615
 616   /* If the destination is a paradoxical subreg such that we need a
 617      truncate to the inner mode, perform the insertion on a temporary and
 618      truncate the result to the original destination.  Note that we can't
 619      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 620      X) 0)) is (reg:N X).  */
 621   if (GET_CODE (xop0) == SUBREG
 622       && REG_P (SUBREG_REG (xop0))
 623       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 624                                          op_mode))
 625     {
 626       rtx tem = gen_reg_rtx (op_mode);
 627       emit_move_insn (tem, xop0);
 628       xop0 = tem;
 629       copy_back = true;
 630     }
 631
 632   /* There are similar overflow check at the start of store_bit_field_1,
 633      but that only check the situation where the field lies completely
 634      outside the register, while there do have situation where the field
 635      lies partialy in the register, we need to adjust bitsize for this
 636      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
 637      will broken on those arch support bit insert instruction, like arm, aarch64
 638      etc.  */
 639   if (bitsize + bitnum > unit && bitnum < unit)
 640     {
 641       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
 642                "destination object, data truncated into %wu-bit",
 643                bitsize, unit - bitnum);
 644       bitsize = unit - bitnum;
 645     }
 646
 647   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 648      "backwards" from the size of the unit we are inserting into.
 649      Otherwise, we count bits from the most significant on a
 650      BYTES/BITS_BIG_ENDIAN machine.  */
 651
 652   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 653     bitnum = unit - bitsize - bitnum;
 654
 655   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 656   value1 = value;
 657   if (GET_MODE (value) != op_mode)
 658     {
 659       if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 660         {
 661           rtx tmp;
 662           /* Optimization: Don't bother really extending VALUE
 663              if it has all the bits we will actually use.  However,
 664              if we must narrow it, be sure we do it correctly.  */
 665
 666           if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 667             {
 668               tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 669               if (! tmp)
 670                 tmp = simplify_gen_subreg (op_mode,
 671                                            force_reg (GET_MODE (value),
 672                                                       value1),
 673                                            GET_MODE (value), 0);
 674             }
 675           else
 676             {
 677               tmp = gen_lowpart_if_possible (op_mode, value1);
 678               if (! tmp)
 679                 tmp = gen_lowpart (op_mode, force_reg (GET_MODE (value),
 680                                                        value1));
 681             }
 682           value1 = tmp;
 683         }
 684       else if (CONST_INT_P (value))
 685         value1 = gen_int_mode (INTVAL (value), op_mode);
 686       else
 687         /* Parse phase is supposed to make VALUE's data type
 688            match that of the component reference, which is a type
 689            at least as wide as the field; so VALUE should have
 690            a mode that corresponds to that type.  */
 691         gcc_assert (CONSTANT_P (value));
 692     }
 693
 694   create_fixed_operand (&ops[0], xop0);
 695   create_integer_operand (&ops[1], bitsize);
 696   create_integer_operand (&ops[2], bitnum);
 697   create_input_operand (&ops[3], value1, op_mode);
 698   if (maybe_expand_insn (insv->icode, 4, ops))
 699     {
 700       if (copy_back)
 701         convert_move (op0, xop0, true);
 702       return true;
 703     }
 704   delete_insns_since (last);
 705   return false;
 706 }
 707
 708 /* A subroutine of store_bit_field, with the same arguments.  Return true
 709    if the operation could be implemented.
 710
 711    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 712    no other way of implementing the operation.  If FALLBACK_P is false,
 713    return false instead.  */
 714
 715 static bool
 716 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 717                    unsigned HOST_WIDE_INT bitnum,
 718                    unsigned HOST_WIDE_INT bitregion_start,
 719                    unsigned HOST_WIDE_INT bitregion_end,
 720                    machine_mode fieldmode,
 721                    rtx value, bool reverse, bool fallback_p)
 722 {
 723   rtx op0 = str_rtx;
 724   rtx orig_value;
 725
 726   while (GET_CODE (op0) == SUBREG)
 727     {
 728       /* The following line once was done only if WORDS_BIG_ENDIAN,
 729          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 730          meaningful at a much higher level; when structures are copied
 731          between memory and regs, the higher-numbered regs
 732          always get higher addresses.  */
 733       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 734       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 735       int byte_offset = 0;
 736
 737       /* Paradoxical subregs need special handling on big-endian machines.  */
 738       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 739         {
 740           int difference = inner_mode_size - outer_mode_size;
 741
 742           if (WORDS_BIG_ENDIAN)
 743             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 744           if (BYTES_BIG_ENDIAN)
 745             byte_offset += difference % UNITS_PER_WORD;
 746         }
 747       else
 748         byte_offset = SUBREG_BYTE (op0);
 749
 750       bitnum += byte_offset * BITS_PER_UNIT;
 751       op0 = SUBREG_REG (op0);
 752     }
 753
 754   /* No action is needed if the target is a register and if the field
 755      lies completely outside that register.  This can occur if the source
 756      code contains an out-of-bounds access to a small array.  */
 757   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 758     return true;
 759
 760   /* Use vec_set patterns for inserting parts of vectors whenever
 761      available.  */
 762   if (VECTOR_MODE_P (GET_MODE (op0))
 763       && !MEM_P (op0)
 764       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 765       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 766       && bitsize == GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
 767       && !(bitnum % GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
 768     {
 769       struct expand_operand ops[3];
 770       machine_mode outermode = GET_MODE (op0);
 771       machine_mode innermode = GET_MODE_INNER (outermode);
 772       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 773       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 774
 775       create_fixed_operand (&ops[0], op0);
 776       create_input_operand (&ops[1], value, innermode);
 777       create_integer_operand (&ops[2], pos);
 778       if (maybe_expand_insn (icode, 3, ops))
 779         return true;
 780     }
 781
 782   /* If the target is a register, overwriting the entire object, or storing
 783      a full-word or multi-word field can be done with just a SUBREG.  */
 784   if (!MEM_P (op0)
 785       && bitsize == GET_MODE_BITSIZE (fieldmode)
 786       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 787           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 788     {
 789       /* Use the subreg machinery either to narrow OP0 to the required
 790          words or to cope with mode punning between equal-sized modes.
 791          In the latter case, use subreg on the rhs side, not lhs.  */
 792       rtx sub;
 793
 794       if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
 795         {
 796           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 797           if (sub)
 798             {
 799               if (reverse)
 800                 sub = flip_storage_order (GET_MODE (op0), sub);
 801               emit_move_insn (op0, sub);
 802               return true;
 803             }
 804         }
 805       else
 806         {
 807           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 808                                      bitnum / BITS_PER_UNIT);
 809           if (sub)
 810             {
 811               if (reverse)
 812                 value = flip_storage_order (fieldmode, value);
 813               emit_move_insn (sub, value);
 814               return true;
 815             }
 816         }
 817     }
 818
 819   /* If the target is memory, storing any naturally aligned field can be
 820      done with a simple store.  For targets that support fast unaligned
 821      memory, any naturally sized, unit aligned field can be done directly.  */
 822   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
 823     {
 824       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 825       if (reverse)
 826         value = flip_storage_order (fieldmode, value);
 827       emit_move_insn (op0, value);
 828       return true;
 829     }
 830
 831   /* Make sure we are playing with integral modes.  Pun with subregs
 832      if we aren't.  This must come after the entire register case above,
 833      since that case is valid for any mode.  The following cases are only
 834      valid for integral modes.  */
 835   {
 836     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 837     if (imode != GET_MODE (op0))
 838       {
 839         if (MEM_P (op0))
 840           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
 841         else
 842           {
 843             gcc_assert (imode != BLKmode);
 844             op0 = gen_lowpart (imode, op0);
 845           }
 846       }
 847   }
 848
 849   /* Storing an lsb-aligned field in a register
 850      can be done with a movstrict instruction.  */
 851
 852   if (!MEM_P (op0)
 853       && !reverse
 854       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 855       && bitsize == GET_MODE_BITSIZE (fieldmode)
 856       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 857     {
 858       struct expand_operand ops[2];
 859       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 860       rtx arg0 = op0;
 861       unsigned HOST_WIDE_INT subreg_off;
 862
 863       if (GET_CODE (arg0) == SUBREG)
 864         {
 865           /* Else we've got some float mode source being extracted into
 866              a different float mode destination -- this combination of
 867              subregs results in Severe Tire Damage.  */
 868           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 869                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 870                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 871           arg0 = SUBREG_REG (arg0);
 872         }
 873
 874       subreg_off = bitnum / BITS_PER_UNIT;
 875       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 876         {
 877           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 878
 879           create_fixed_operand (&ops[0], arg0);
 880           /* Shrink the source operand to FIELDMODE.  */
 881           create_convert_operand_to (&ops[1], value, fieldmode, false);
 882           if (maybe_expand_insn (icode, 2, ops))
 883             return true;
 884         }
 885     }
 886
 887   /* Handle fields bigger than a word.  */
 888
 889   if (bitsize > BITS_PER_WORD)
 890     {
 891       /* Here we transfer the words of the field
 892          in the order least significant first.
 893          This is because the most significant word is the one which may
 894          be less than full.
 895          However, only do that if the value is not BLKmode.  */
 896
 897       const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 898       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 899       unsigned int i;
 900       rtx_insn *last;
 901
 902       /* This is the mode we must force value to, so that there will be enough
 903          subwords to extract.  Note that fieldmode will often (always?) be
 904          VOIDmode, because that is what store_field uses to indicate that this
 905          is a bit field, but passing VOIDmode to operand_subword_force
 906          is not allowed.  */
 907       fieldmode = GET_MODE (value);
 908       if (fieldmode == VOIDmode)
 909         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 910
 911       last = get_last_insn ();
 912       for (i = 0; i < nwords; i++)
 913         {
 914           /* If I is 0, use the low-order word in both field and target;
 915              if I is 1, use the next to lowest word; and so on.  */
 916           unsigned int wordnum = (backwards
 917                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 918                                   - i - 1
 919                                   : i);
 920           unsigned int bit_offset = (backwards ^ reverse
 921                                      ? MAX ((int) bitsize - ((int) i + 1)
 922                                             * BITS_PER_WORD,
 923                                             0)
 924                                      : (int) i * BITS_PER_WORD);
 925           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 926           unsigned HOST_WIDE_INT new_bitsize =
 927             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 928
 929           /* If the remaining chunk doesn't have full wordsize we have
 930              to make sure that for big-endian machines the higher order
 931              bits are used.  */
 932           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 933             value_word = simplify_expand_binop (word_mode, lshr_optab,
 934                                                 value_word,
 935                                                 GEN_INT (BITS_PER_WORD
 936                                                          - new_bitsize),
 937                                                 NULL_RTX, true,
 938                                                 OPTAB_LIB_WIDEN);
 939
 940           if (!store_bit_field_1 (op0, new_bitsize,
 941                                   bitnum + bit_offset,
 942                                   bitregion_start, bitregion_end,
 943                                   word_mode,
 944                                   value_word, reverse, fallback_p))
 945             {
 946               delete_insns_since (last);
 947               return false;
 948             }
 949         }
 950       return true;
 951     }
 952
 953   /* If VALUE has a floating-point or complex mode, access it as an
 954      integer of the corresponding size.  This can occur on a machine
 955      with 64 bit registers that uses SFmode for float.  It can also
 956      occur for unaligned float or complex fields.  */
 957   orig_value = value;
 958   if (GET_MODE (value) != VOIDmode
 959       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 960       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 961     {
 962       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 963       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 964     }
 965
 966   /* If OP0 is a multi-word register, narrow it to the affected word.
 967      If the region spans two words, defer to store_split_bit_field.  */
 968   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 969     {
 970       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
 971         {
 972           if (!fallback_p)
 973             return false;
 974
 975           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 976                                  bitregion_end, value, reverse);
 977           return true;
 978         }
 979       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
 980                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 981       gcc_assert (op0);
 982       bitnum %= BITS_PER_WORD;
 983     }
 984
 985   /* From here on we can assume that the field to be stored in fits
 986      within a word.  If the destination is a register, it too fits
 987      in a word.  */
 988
 989   extraction_insn insv;
 990   if (!MEM_P (op0)
 991       && !reverse
 992       && get_best_reg_extraction_insn (&insv, EP_insv,
 993                                        GET_MODE_BITSIZE (GET_MODE (op0)),
 994                                        fieldmode)
 995       && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 996     return true;
 997
 998   /* If OP0 is a memory, try copying it to a register and seeing if a
 999      cheap register alternative is available.  */
1000   if (MEM_P (op0) && !reverse)
1001     {
1002       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1003                                         fieldmode)
1004           && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
1005         return true;
1006
1007       rtx_insn *last = get_last_insn ();
1008
1009       /* Try loading part of OP0 into a register, inserting the bitfield
1010          into that, and then copying the result back to OP0.  */
1011       unsigned HOST_WIDE_INT bitpos;
1012       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1013                                                bitregion_start, bitregion_end,
1014                                                fieldmode, &bitpos);
1015       if (xop0)
1016         {
1017           rtx tempreg = copy_to_reg (xop0);
1018           if (store_bit_field_1 (tempreg, bitsize, bitpos,
1019                                  bitregion_start, bitregion_end,
1020                                  fieldmode, orig_value, reverse, false))
1021             {
1022               emit_move_insn (xop0, tempreg);
1023               return true;
1024             }
1025           delete_insns_since (last);
1026         }
1027     }
1028
1029   if (!fallback_p)
1030     return false;
1031
1032   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
1033                          bitregion_end, value, reverse);
1034   return true;
1035 }
1036
1037 /* Generate code to store value from rtx VALUE
1038    into a bit-field within structure STR_RTX
1039    containing BITSIZE bits starting at bit BITNUM.
1040
1041    BITREGION_START is bitpos of the first bitfield in this region.
1042    BITREGION_END is the bitpos of the ending bitfield in this region.
1043    These two fields are 0, if the C++ memory model does not apply,
1044    or we are not interested in keeping track of bitfield regions.
1045
1046    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1047
1048    If REVERSE is true, the store is to be done in reverse order.  */
1049
1050 void
1051 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1052                  unsigned HOST_WIDE_INT bitnum,
1053                  unsigned HOST_WIDE_INT bitregion_start,
1054                  unsigned HOST_WIDE_INT bitregion_end,
1055                  machine_mode fieldmode,
1056                  rtx value, bool reverse)
1057 {
1058   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1059   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
1060                                   bitregion_start, bitregion_end))
1061     {
1062       /* Storing of a full word can be done with a simple store.
1063          We know here that the field can be accessed with one single
1064          instruction.  For targets that support unaligned memory,
1065          an unaligned access may be necessary.  */
1066       if (bitsize == GET_MODE_BITSIZE (fieldmode))
1067         {
1068           str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
1069                                              bitnum / BITS_PER_UNIT);
1070           if (reverse)
1071             value = flip_storage_order (fieldmode, value);
1072           gcc_assert (bitnum % BITS_PER_UNIT == 0);
1073           emit_move_insn (str_rtx, value);
1074         }
1075       else
1076         {
1077           rtx temp;
1078
1079           str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
1080                                           &bitnum);
1081           gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (fieldmode));
1082           temp = copy_to_reg (str_rtx);
1083           if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
1084                                   fieldmode, value, reverse, true))
1085             gcc_unreachable ();
1086
1087           emit_move_insn (str_rtx, temp);
1088         }
1089
1090       return;
1091     }
1092
1093   /* Under the C++0x memory model, we must not touch bits outside the
1094      bit region.  Adjust the address to start at the beginning of the
1095      bit region.  */
1096   if (MEM_P (str_rtx) && bitregion_start > 0)
1097     {
1098       machine_mode bestmode;
1099       HOST_WIDE_INT offset, size;
1100
1101       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1102
1103       offset = bitregion_start / BITS_PER_UNIT;
1104       bitnum -= bitregion_start;
1105       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1106       bitregion_end -= bitregion_start;
1107       bitregion_start = 0;
1108       bestmode = get_best_mode (bitsize, bitnum,
1109                                 bitregion_start, bitregion_end,
1110                                 MEM_ALIGN (str_rtx), VOIDmode,
1111                                 MEM_VOLATILE_P (str_rtx));
1112       str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
1113     }
1114
1115   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1116                           bitregion_start, bitregion_end,
1117                           fieldmode, value, reverse, true))
1118     gcc_unreachable ();
1119 }
1120 \f
1121 /* Use shifts and boolean operations to store VALUE into a bit field of
1122    width BITSIZE in OP0, starting at bit BITNUM.
1123
1124    If REVERSE is true, the store is to be done in reverse order.  */
1125
1126 static void
1127 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1128                        unsigned HOST_WIDE_INT bitnum,
1129                        unsigned HOST_WIDE_INT bitregion_start,
1130                        unsigned HOST_WIDE_INT bitregion_end,
1131                        rtx value, bool reverse)
1132 {
1133   /* There is a case not handled here:
1134      a structure with a known alignment of just a halfword
1135      and a field split across two aligned halfwords within the structure.
1136      Or likewise a structure with a known alignment of just a byte
1137      and a field split across two bytes.
1138      Such cases are not supposed to be able to occur.  */
1139
1140   if (MEM_P (op0))
1141     {
1142       machine_mode mode = GET_MODE (op0);
1143       if (GET_MODE_BITSIZE (mode) == 0
1144           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1145         mode = word_mode;
1146       mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1147                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1148
1149       if (mode == VOIDmode)
1150         {
1151           /* The only way this should occur is if the field spans word
1152              boundaries.  */
1153           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1154                                  bitregion_end, value, reverse);
1155           return;
1156         }
1157
1158       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1159     }
1160
1161   store_fixed_bit_field_1 (op0, bitsize, bitnum, value, reverse);
1162 }
1163
1164 /* Helper function for store_fixed_bit_field, stores
1165    the bit field always using the MODE of OP0.  */
1166
1167 static void
1168 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1169                          unsigned HOST_WIDE_INT bitnum,
1170                          rtx value, bool reverse)
1171 {
1172   machine_mode mode;
1173   rtx temp;
1174   int all_zero = 0;
1175   int all_one = 0;
1176
1177   mode = GET_MODE (op0);
1178   gcc_assert (SCALAR_INT_MODE_P (mode));
1179
1180   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1181      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1182
1183   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1184     /* BITNUM is the distance between our msb
1185        and that of the containing datum.
1186        Convert it to the distance from the lsb.  */
1187     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1188
1189   /* Now BITNUM is always the distance between our lsb
1190      and that of OP0.  */
1191
1192   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1193      we must first convert its mode to MODE.  */
1194
1195   if (CONST_INT_P (value))
1196     {
1197       unsigned HOST_WIDE_INT v = UINTVAL (value);
1198
1199       if (bitsize < HOST_BITS_PER_WIDE_INT)
1200         v &= ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1;
1201
1202       if (v == 0)
1203         all_zero = 1;
1204       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1205                 && v == ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1)
1206                || (bitsize == HOST_BITS_PER_WIDE_INT
1207                    && v == (unsigned HOST_WIDE_INT) -1))
1208         all_one = 1;
1209
1210       value = lshift_value (mode, v, bitnum);
1211     }
1212   else
1213     {
1214       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1215                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1216
1217       if (GET_MODE (value) != mode)
1218         value = convert_to_mode (mode, value, 1);
1219
1220       if (must_and)
1221         value = expand_binop (mode, and_optab, value,
1222                               mask_rtx (mode, 0, bitsize, 0),
1223                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1224       if (bitnum > 0)
1225         value = expand_shift (LSHIFT_EXPR, mode, value,
1226                               bitnum, NULL_RTX, 1);
1227     }
1228
1229   if (reverse)
1230     value = flip_storage_order (mode, value);
1231
1232   /* Now clear the chosen bits in OP0,
1233      except that if VALUE is -1 we need not bother.  */
1234   /* We keep the intermediates in registers to allow CSE to combine
1235      consecutive bitfield assignments.  */
1236
1237   temp = force_reg (mode, op0);
1238
1239   if (! all_one)
1240     {
1241       rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1242       if (reverse)
1243         mask = flip_storage_order (mode, mask);
1244       temp = expand_binop (mode, and_optab, temp, mask,
1245                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1246       temp = force_reg (mode, temp);
1247     }
1248
1249   /* Now logical-or VALUE into OP0, unless it is zero.  */
1250
1251   if (! all_zero)
1252     {
1253       temp = expand_binop (mode, ior_optab, temp, value,
1254                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1255       temp = force_reg (mode, temp);
1256     }
1257
1258   if (op0 != temp)
1259     {
1260       op0 = copy_rtx (op0);
1261       emit_move_insn (op0, temp);
1262     }
1263 }
1264 \f
1265 /* Store a bit field that is split across multiple accessible memory objects.
1266
1267    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1268    BITSIZE is the field width; BITPOS the position of its first bit
1269    (within the word).
1270    VALUE is the value to store.
1271
1272    If REVERSE is true, the store is to be done in reverse order.
1273
1274    This does not yet handle fields wider than BITS_PER_WORD.  */
1275
1276 static void
1277 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1278                        unsigned HOST_WIDE_INT bitpos,
1279                        unsigned HOST_WIDE_INT bitregion_start,
1280                        unsigned HOST_WIDE_INT bitregion_end,
1281                        rtx value, bool reverse)
1282 {
1283   unsigned int unit, total_bits, bitsdone = 0;
1284
1285   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1286      much at a time.  */
1287   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1288     unit = BITS_PER_WORD;
1289   else
1290     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1291
1292   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1293      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1294      again, and we will mutually recurse forever.  */
1295   if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1296     unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1297
1298   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1299      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1300      that VALUE might be a floating-point constant.  */
1301   if (CONSTANT_P (value) && !CONST_INT_P (value))
1302     {
1303       rtx word = gen_lowpart_common (word_mode, value);
1304
1305       if (word && (value != word))
1306         value = word;
1307       else
1308         value = gen_lowpart_common (word_mode,
1309                                     force_reg (GET_MODE (value) != VOIDmode
1310                                                ? GET_MODE (value)
1311                                                : word_mode, value));
1312     }
1313
1314   total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1315
1316   while (bitsdone < bitsize)
1317     {
1318       unsigned HOST_WIDE_INT thissize;
1319       unsigned HOST_WIDE_INT thispos;
1320       unsigned HOST_WIDE_INT offset;
1321       rtx part, word;
1322
1323       offset = (bitpos + bitsdone) / unit;
1324       thispos = (bitpos + bitsdone) % unit;
1325
1326       /* When region of bytes we can touch is restricted, decrease
1327          UNIT close to the end of the region as needed.  If op0 is a REG
1328          or SUBREG of REG, don't do this, as there can't be data races
1329          on a register and we can expand shorter code in some cases.  */
1330       if (bitregion_end
1331           && unit > BITS_PER_UNIT
1332           && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1333           && !REG_P (op0)
1334           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1335         {
1336           unit = unit / 2;
1337           continue;
1338         }
1339
1340       /* THISSIZE must not overrun a word boundary.  Otherwise,
1341          store_fixed_bit_field will call us again, and we will mutually
1342          recurse forever.  */
1343       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1344       thissize = MIN (thissize, unit - thispos);
1345
1346       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1347         {
1348           /* Fetch successively less significant portions.  */
1349           if (CONST_INT_P (value))
1350             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1351                              >> (bitsize - bitsdone - thissize))
1352                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1353           /* Likewise, but the source is little-endian.  */
1354           else if (reverse)
1355             part = extract_fixed_bit_field (word_mode, value, thissize,
1356                                             bitsize - bitsdone - thissize,
1357                                             NULL_RTX, 1, false);
1358           else
1359             {
1360               int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1361               /* The args are chosen so that the last part includes the
1362                  lsb.  Give extract_bit_field the value it needs (with
1363                  endianness compensation) to fetch the piece we want.  */
1364               part = extract_fixed_bit_field (word_mode, value, thissize,
1365                                               total_bits - bitsize + bitsdone,
1366                                               NULL_RTX, 1, false);
1367             }
1368         }
1369       else
1370         {
1371           /* Fetch successively more significant portions.  */
1372           if (CONST_INT_P (value))
1373             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1374                              >> bitsdone)
1375                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1376           /* Likewise, but the source is big-endian.  */
1377           else if (reverse)
1378             part = extract_fixed_bit_field (word_mode, value, thissize,
1379                                             total_bits - bitsdone - thissize,
1380                                             NULL_RTX, 1, false);
1381           else
1382             part = extract_fixed_bit_field (word_mode, value, thissize,
1383                                             bitsdone, NULL_RTX, 1, false);
1384         }
1385
1386       /* If OP0 is a register, then handle OFFSET here.  */
1387       if (SUBREG_P (op0) || REG_P (op0))
1388         {
1389           machine_mode op0_mode = GET_MODE (op0);
1390           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1391             word = offset ? const0_rtx : op0;
1392           else
1393             word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1394                                           GET_MODE (op0));
1395           offset &= BITS_PER_WORD / unit - 1;
1396         }
1397       else
1398         word = op0;
1399
1400       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1401          it is just an out-of-bounds access.  Ignore it.  */
1402       if (word != const0_rtx)
1403         store_fixed_bit_field (word, thissize, offset * unit + thispos,
1404                                bitregion_start, bitregion_end, part,
1405                                reverse);
1406       bitsdone += thissize;
1407     }
1408 }
1409 \f
1410 /* A subroutine of extract_bit_field_1 that converts return value X
1411    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1412    to extract_bit_field.  */
1413
1414 static rtx
1415 convert_extracted_bit_field (rtx x, machine_mode mode,
1416                              machine_mode tmode, bool unsignedp)
1417 {
1418   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1419     return x;
1420
1421   /* If the x mode is not a scalar integral, first convert to the
1422      integer mode of that size and then access it as a floating-point
1423      value via a SUBREG.  */
1424   if (!SCALAR_INT_MODE_P (tmode))
1425     {
1426       machine_mode smode;
1427
1428       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1429       x = convert_to_mode (smode, x, unsignedp);
1430       x = force_reg (smode, x);
1431       return gen_lowpart (tmode, x);
1432     }
1433
1434   return convert_to_mode (tmode, x, unsignedp);
1435 }
1436
1437 /* Try to use an ext(z)v pattern to extract a field from OP0.
1438    Return the extracted value on success, otherwise return null.
1439    EXT_MODE is the mode of the extraction and the other arguments
1440    are as for extract_bit_field.  */
1441
1442 static rtx
1443 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1444                               unsigned HOST_WIDE_INT bitsize,
1445                               unsigned HOST_WIDE_INT bitnum,
1446                               int unsignedp, rtx target,
1447                               machine_mode mode, machine_mode tmode)
1448 {
1449   struct expand_operand ops[4];
1450   rtx spec_target = target;
1451   rtx spec_target_subreg = 0;
1452   machine_mode ext_mode = extv->field_mode;
1453   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1454
1455   if (bitsize == 0 || unit < bitsize)
1456     return NULL_RTX;
1457
1458   if (MEM_P (op0))
1459     /* Get a reference to the first byte of the field.  */
1460     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1461                                 &bitnum);
1462   else
1463     {
1464       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1465       if (BYTES_BIG_ENDIAN)
1466         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1467
1468       /* If op0 is a register, we need it in EXT_MODE to make it
1469          acceptable to the format of ext(z)v.  */
1470       if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1471         return NULL_RTX;
1472       if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1473         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1474     }
1475
1476   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1477      "backwards" from the size of the unit we are extracting from.
1478      Otherwise, we count bits from the most significant on a
1479      BYTES/BITS_BIG_ENDIAN machine.  */
1480
1481   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1482     bitnum = unit - bitsize - bitnum;
1483
1484   if (target == 0)
1485     target = spec_target = gen_reg_rtx (tmode);
1486
1487   if (GET_MODE (target) != ext_mode)
1488     {
1489       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1490          between the mode of the extraction (word_mode) and the target
1491          mode.  Instead, create a temporary and use convert_move to set
1492          the target.  */
1493       if (REG_P (target)
1494           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1495         {
1496           target = gen_lowpart (ext_mode, target);
1497           if (GET_MODE_PRECISION (ext_mode)
1498               > GET_MODE_PRECISION (GET_MODE (spec_target)))
1499             spec_target_subreg = target;
1500         }
1501       else
1502         target = gen_reg_rtx (ext_mode);
1503     }
1504
1505   create_output_operand (&ops[0], target, ext_mode);
1506   create_fixed_operand (&ops[1], op0);
1507   create_integer_operand (&ops[2], bitsize);
1508   create_integer_operand (&ops[3], bitnum);
1509   if (maybe_expand_insn (extv->icode, 4, ops))
1510     {
1511       target = ops[0].value;
1512       if (target == spec_target)
1513         return target;
1514       if (target == spec_target_subreg)
1515         return spec_target;
1516       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1517     }
1518   return NULL_RTX;
1519 }
1520
1521 /* A subroutine of extract_bit_field, with the same arguments.
1522    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1523    if we can find no other means of implementing the operation.
1524    if FALLBACK_P is false, return NULL instead.  */
1525
1526 static rtx
1527 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1528                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1529                      machine_mode mode, machine_mode tmode,
1530                      bool reverse, bool fallback_p)
1531 {
1532   rtx op0 = str_rtx;
1533   machine_mode int_mode;
1534   machine_mode mode1;
1535
1536   if (tmode == VOIDmode)
1537     tmode = mode;
1538
1539   while (GET_CODE (op0) == SUBREG)
1540     {
1541       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1542       op0 = SUBREG_REG (op0);
1543     }
1544
1545   /* If we have an out-of-bounds access to a register, just return an
1546      uninitialized register of the required mode.  This can occur if the
1547      source code contains an out-of-bounds access to a small array.  */
1548   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1549     return gen_reg_rtx (tmode);
1550
1551   if (REG_P (op0)
1552       && mode == GET_MODE (op0)
1553       && bitnum == 0
1554       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1555     {
1556       if (reverse)
1557         op0 = flip_storage_order (mode, op0);
1558       /* We're trying to extract a full register from itself.  */
1559       return op0;
1560     }
1561
1562   /* See if we can get a better vector mode before extracting.  */
1563   if (VECTOR_MODE_P (GET_MODE (op0))
1564       && !MEM_P (op0)
1565       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1566     {
1567       machine_mode new_mode;
1568
1569       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1570         new_mode = MIN_MODE_VECTOR_FLOAT;
1571       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1572         new_mode = MIN_MODE_VECTOR_FRACT;
1573       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1574         new_mode = MIN_MODE_VECTOR_UFRACT;
1575       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1576         new_mode = MIN_MODE_VECTOR_ACCUM;
1577       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1578         new_mode = MIN_MODE_VECTOR_UACCUM;
1579       else
1580         new_mode = MIN_MODE_VECTOR_INT;
1581
1582       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1583         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1584             && targetm.vector_mode_supported_p (new_mode))
1585           break;
1586       if (new_mode != VOIDmode)
1587         op0 = gen_lowpart (new_mode, op0);
1588     }
1589
1590   /* Use vec_extract patterns for extracting parts of vectors whenever
1591      available.  */
1592   if (VECTOR_MODE_P (GET_MODE (op0))
1593       && !MEM_P (op0)
1594       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1595       && ((bitnum + bitsize - 1) / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
1596           == bitnum / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
1597     {
1598       struct expand_operand ops[3];
1599       machine_mode outermode = GET_MODE (op0);
1600       machine_mode innermode = GET_MODE_INNER (outermode);
1601       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1602       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1603
1604       create_output_operand (&ops[0], target, innermode);
1605       create_input_operand (&ops[1], op0, outermode);
1606       create_integer_operand (&ops[2], pos);
1607       if (maybe_expand_insn (icode, 3, ops))
1608         {
1609           target = ops[0].value;
1610           if (GET_MODE (target) != mode)
1611             return gen_lowpart (tmode, target);
1612           return target;
1613         }
1614     }
1615
1616   /* Make sure we are playing with integral modes.  Pun with subregs
1617      if we aren't.  */
1618   {
1619     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1620     if (imode != GET_MODE (op0))
1621       {
1622         if (MEM_P (op0))
1623           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1624         else if (imode != BLKmode)
1625           {
1626             op0 = gen_lowpart (imode, op0);
1627
1628             /* If we got a SUBREG, force it into a register since we
1629                aren't going to be able to do another SUBREG on it.  */
1630             if (GET_CODE (op0) == SUBREG)
1631               op0 = force_reg (imode, op0);
1632           }
1633         else
1634           {
1635             HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1636             rtx mem = assign_stack_temp (GET_MODE (op0), size);
1637             emit_move_insn (mem, op0);
1638             op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1639           }
1640       }
1641   }
1642
1643   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1644      If that's wrong, the solution is to test for it and set TARGET to 0
1645      if needed.  */
1646
1647   /* Get the mode of the field to use for atomic access or subreg
1648      conversion.  */
1649   mode1 = mode;
1650   if (SCALAR_INT_MODE_P (tmode))
1651     {
1652       machine_mode try_mode = mode_for_size (bitsize,
1653                                                   GET_MODE_CLASS (tmode), 0);
1654       if (try_mode != BLKmode)
1655         mode1 = try_mode;
1656     }
1657   gcc_assert (mode1 != BLKmode);
1658
1659   /* Extraction of a full MODE1 value can be done with a subreg as long
1660      as the least significant bit of the value is the least significant
1661      bit of either OP0 or a word of OP0.  */
1662   if (!MEM_P (op0)
1663       && !reverse
1664       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1665       && bitsize == GET_MODE_BITSIZE (mode1)
1666       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1667     {
1668       rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1669                                      bitnum / BITS_PER_UNIT);
1670       if (sub)
1671         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1672     }
1673
1674   /* Extraction of a full MODE1 value can be done with a load as long as
1675      the field is on a byte boundary and is sufficiently aligned.  */
1676   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1677     {
1678       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1679       if (reverse)
1680         op0 = flip_storage_order (mode1, op0);
1681       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1682     }
1683
1684   /* Handle fields bigger than a word.  */
1685
1686   if (bitsize > BITS_PER_WORD)
1687     {
1688       /* Here we transfer the words of the field
1689          in the order least significant first.
1690          This is because the most significant word is the one which may
1691          be less than full.  */
1692
1693       const bool backwards = WORDS_BIG_ENDIAN;
1694       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1695       unsigned int i;
1696       rtx_insn *last;
1697
1698       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1699         target = gen_reg_rtx (mode);
1700
1701       /* In case we're about to clobber a base register or something
1702          (see gcc.c-torture/execute/20040625-1.c).   */
1703       if (reg_mentioned_p (target, str_rtx))
1704         target = gen_reg_rtx (mode);
1705
1706       /* Indicate for flow that the entire target reg is being set.  */
1707       emit_clobber (target);
1708
1709       last = get_last_insn ();
1710       for (i = 0; i < nwords; i++)
1711         {
1712           /* If I is 0, use the low-order word in both field and target;
1713              if I is 1, use the next to lowest word; and so on.  */
1714           /* Word number in TARGET to use.  */
1715           unsigned int wordnum
1716             = (backwards
1717                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1718                : i);
1719           /* Offset from start of field in OP0.  */
1720           unsigned int bit_offset = (backwards ^ reverse
1721                                      ? MAX ((int) bitsize - ((int) i + 1)
1722                                             * BITS_PER_WORD,
1723                                             0)
1724                                      : (int) i * BITS_PER_WORD);
1725           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1726           rtx result_part
1727             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1728                                              bitsize - i * BITS_PER_WORD),
1729                                    bitnum + bit_offset, 1, target_part,
1730                                    mode, word_mode, reverse, fallback_p);
1731
1732           gcc_assert (target_part);
1733           if (!result_part)
1734             {
1735               delete_insns_since (last);
1736               return NULL;
1737             }
1738
1739           if (result_part != target_part)
1740             emit_move_insn (target_part, result_part);
1741         }
1742
1743       if (unsignedp)
1744         {
1745           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1746              need to be zero'd out.  */
1747           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1748             {
1749               unsigned int i, total_words;
1750
1751               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1752               for (i = nwords; i < total_words; i++)
1753                 emit_move_insn
1754                   (operand_subword (target,
1755                                     backwards ? total_words - i - 1 : i,
1756                                     1, VOIDmode),
1757                    const0_rtx);
1758             }
1759           return target;
1760         }
1761
1762       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1763       target = expand_shift (LSHIFT_EXPR, mode, target,
1764                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1765       return expand_shift (RSHIFT_EXPR, mode, target,
1766                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1767     }
1768
1769   /* If OP0 is a multi-word register, narrow it to the affected word.
1770      If the region spans two words, defer to extract_split_bit_field.  */
1771   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1772     {
1773       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1774         {
1775           if (!fallback_p)
1776             return NULL_RTX;
1777           target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1778                                             reverse);
1779           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1780         }
1781       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1782                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1783       bitnum %= BITS_PER_WORD;
1784     }
1785
1786   /* From here on we know the desired field is smaller than a word.
1787      If OP0 is a register, it too fits within a word.  */
1788   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1789   extraction_insn extv;
1790   if (!MEM_P (op0)
1791       && !reverse
1792       /* ??? We could limit the structure size to the part of OP0 that
1793          contains the field, with appropriate checks for endianness
1794          and TRULY_NOOP_TRUNCATION.  */
1795       && get_best_reg_extraction_insn (&extv, pattern,
1796                                        GET_MODE_BITSIZE (GET_MODE (op0)),
1797                                        tmode))
1798     {
1799       rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1800                                                  unsignedp, target, mode,
1801                                                  tmode);
1802       if (result)
1803         return result;
1804     }
1805
1806   /* If OP0 is a memory, try copying it to a register and seeing if a
1807      cheap register alternative is available.  */
1808   if (MEM_P (op0) & !reverse)
1809     {
1810       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1811                                         tmode))
1812         {
1813           rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1814                                                      bitnum, unsignedp,
1815                                                      target, mode,
1816                                                      tmode);
1817           if (result)
1818             return result;
1819         }
1820
1821       rtx_insn *last = get_last_insn ();
1822
1823       /* Try loading part of OP0 into a register and extracting the
1824          bitfield from that.  */
1825       unsigned HOST_WIDE_INT bitpos;
1826       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1827                                                0, 0, tmode, &bitpos);
1828       if (xop0)
1829         {
1830           xop0 = copy_to_reg (xop0);
1831           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1832                                             unsignedp, target,
1833                                             mode, tmode, reverse, false);
1834           if (result)
1835             return result;
1836           delete_insns_since (last);
1837         }
1838     }
1839
1840   if (!fallback_p)
1841     return NULL;
1842
1843   /* Find a correspondingly-sized integer field, so we can apply
1844      shifts and masks to it.  */
1845   int_mode = int_mode_for_mode (tmode);
1846   if (int_mode == BLKmode)
1847     int_mode = int_mode_for_mode (mode);
1848   /* Should probably push op0 out to memory and then do a load.  */
1849   gcc_assert (int_mode != BLKmode);
1850
1851   target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum, target,
1852                                     unsignedp, reverse);
1853
1854   /* Complex values must be reversed piecewise, so we need to undo the global
1855      reversal, convert to the complex mode and reverse again.  */
1856   if (reverse && COMPLEX_MODE_P (tmode))
1857     {
1858       target = flip_storage_order (int_mode, target);
1859       target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1860       target = flip_storage_order (tmode, target);
1861     }
1862   else
1863     target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1864
1865   return target;
1866 }
1867
1868 /* Generate code to extract a byte-field from STR_RTX
1869    containing BITSIZE bits, starting at BITNUM,
1870    and put it in TARGET if possible (if TARGET is nonzero).
1871    Regardless of TARGET, we return the rtx for where the value is placed.
1872
1873    STR_RTX is the structure containing the byte (a REG or MEM).
1874    UNSIGNEDP is nonzero if this is an unsigned bit field.
1875    MODE is the natural mode of the field value once extracted.
1876    TMODE is the mode the caller would like the value to have;
1877    but the value may be returned with type MODE instead.
1878
1879    If REVERSE is true, the extraction is to be done in reverse order.
1880
1881    If a TARGET is specified and we can store in it at no extra cost,
1882    we do so, and return TARGET.
1883    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1884    if they are equally easy.  */
1885
1886 rtx
1887 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1888                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1889                    machine_mode mode, machine_mode tmode, bool reverse)
1890 {
1891   machine_mode mode1;
1892
1893   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1894   if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1895     mode1 = GET_MODE (str_rtx);
1896   else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1897     mode1 = GET_MODE (target);
1898   else
1899     mode1 = tmode;
1900
1901   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1902     {
1903       /* Extraction of a full MODE1 value can be done with a simple load.
1904          We know here that the field can be accessed with one single
1905          instruction.  For targets that support unaligned memory,
1906          an unaligned access may be necessary.  */
1907       if (bitsize == GET_MODE_BITSIZE (mode1))
1908         {
1909           rtx result = adjust_bitfield_address (str_rtx, mode1,
1910                                                 bitnum / BITS_PER_UNIT);
1911           if (reverse)
1912             result = flip_storage_order (mode1, result);
1913           gcc_assert (bitnum % BITS_PER_UNIT == 0);
1914           return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1915         }
1916
1917       str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1918                                       &bitnum);
1919       gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (mode1));
1920       str_rtx = copy_to_reg (str_rtx);
1921     }
1922
1923   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1924                               target, mode, tmode, reverse, true);
1925 }
1926 \f
1927 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1928    from bit BITNUM of OP0.
1929
1930    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1931    If REVERSE is true, the extraction is to be done in reverse order.
1932
1933    If TARGET is nonzero, attempts to store the value there
1934    and return TARGET, but this is not guaranteed.
1935    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1936
1937 static rtx
1938 extract_fixed_bit_field (machine_mode tmode, rtx op0,
1939                          unsigned HOST_WIDE_INT bitsize,
1940                          unsigned HOST_WIDE_INT bitnum, rtx target,
1941                          int unsignedp, bool reverse)
1942 {
1943   if (MEM_P (op0))
1944     {
1945       machine_mode mode
1946         = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1947                          MEM_VOLATILE_P (op0));
1948
1949       if (mode == VOIDmode)
1950         /* The only way this should occur is if the field spans word
1951            boundaries.  */
1952         return extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1953                                         reverse);
1954
1955       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1956     }
1957
1958   return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1959                                     target, unsignedp, reverse);
1960 }
1961
1962 /* Helper function for extract_fixed_bit_field, extracts
1963    the bit field always using the MODE of OP0.  */
1964
1965 static rtx
1966 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
1967                            unsigned HOST_WIDE_INT bitsize,
1968                            unsigned HOST_WIDE_INT bitnum, rtx target,
1969                            int unsignedp, bool reverse)
1970 {
1971   machine_mode mode = GET_MODE (op0);
1972   gcc_assert (SCALAR_INT_MODE_P (mode));
1973
1974   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1975      for invalid input, such as extract equivalent of f5 from
1976      gcc.dg/pr48335-2.c.  */
1977
1978   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1979     /* BITNUM is the distance between our msb and that of OP0.
1980        Convert it to the distance from the lsb.  */
1981     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1982
1983   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1984      We have reduced the big-endian case to the little-endian case.  */
1985   if (reverse)
1986     op0 = flip_storage_order (mode, op0);
1987
1988   if (unsignedp)
1989     {
1990       if (bitnum)
1991         {
1992           /* If the field does not already start at the lsb,
1993              shift it so it does.  */
1994           /* Maybe propagate the target for the shift.  */
1995           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1996           if (tmode != mode)
1997             subtarget = 0;
1998           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1999         }
2000       /* Convert the value to the desired mode.  */
2001       if (mode != tmode)
2002         op0 = convert_to_mode (tmode, op0, 1);
2003
2004       /* Unless the msb of the field used to be the msb when we shifted,
2005          mask out the upper bits.  */
2006
2007       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2008         return expand_binop (GET_MODE (op0), and_optab, op0,
2009                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
2010                              target, 1, OPTAB_LIB_WIDEN);
2011       return op0;
2012     }
2013
2014   /* To extract a signed bit-field, first shift its msb to the msb of the word,
2015      then arithmetic-shift its lsb to the lsb of the word.  */
2016   op0 = force_reg (mode, op0);
2017
2018   /* Find the narrowest integer mode that contains the field.  */
2019
2020   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
2021        mode = GET_MODE_WIDER_MODE (mode))
2022     if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
2023       {
2024         op0 = convert_to_mode (mode, op0, 0);
2025         break;
2026       }
2027
2028   if (mode != tmode)
2029     target = 0;
2030
2031   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2032     {
2033       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2034       /* Maybe propagate the target for the shift.  */
2035       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2036       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2037     }
2038
2039   return expand_shift (RSHIFT_EXPR, mode, op0,
2040                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2041 }
2042
2043 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2044    VALUE << BITPOS.  */
2045
2046 static rtx
2047 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2048               int bitpos)
2049 {
2050   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2051 }
2052 \f
2053 /* Extract a bit field that is split across two words
2054    and return an RTX for the result.
2055
2056    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2057    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2058    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2059
2060    If REVERSE is true, the extraction is to be done in reverse order.  */
2061
2062 static rtx
2063 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2064                          unsigned HOST_WIDE_INT bitpos, int unsignedp,
2065                          bool reverse)
2066 {
2067   unsigned int unit;
2068   unsigned int bitsdone = 0;
2069   rtx result = NULL_RTX;
2070   int first = 1;
2071
2072   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2073      much at a time.  */
2074   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2075     unit = BITS_PER_WORD;
2076   else
2077     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2078
2079   while (bitsdone < bitsize)
2080     {
2081       unsigned HOST_WIDE_INT thissize;
2082       rtx part, word;
2083       unsigned HOST_WIDE_INT thispos;
2084       unsigned HOST_WIDE_INT offset;
2085
2086       offset = (bitpos + bitsdone) / unit;
2087       thispos = (bitpos + bitsdone) % unit;
2088
2089       /* THISSIZE must not overrun a word boundary.  Otherwise,
2090          extract_fixed_bit_field will call us again, and we will mutually
2091          recurse forever.  */
2092       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2093       thissize = MIN (thissize, unit - thispos);
2094
2095       /* If OP0 is a register, then handle OFFSET here.  */
2096       if (SUBREG_P (op0) || REG_P (op0))
2097         {
2098           word = operand_subword_force (op0, offset, GET_MODE (op0));
2099           offset = 0;
2100         }
2101       else
2102         word = op0;
2103
2104       /* Extract the parts in bit-counting order,
2105          whose meaning is determined by BYTES_PER_UNIT.
2106          OFFSET is in UNITs, and UNIT is in bits.  */
2107       part = extract_fixed_bit_field (word_mode, word, thissize,
2108                                       offset * unit + thispos, 0, 1, reverse);
2109       bitsdone += thissize;
2110
2111       /* Shift this part into place for the result.  */
2112       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2113         {
2114           if (bitsize != bitsdone)
2115             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2116                                  bitsize - bitsdone, 0, 1);
2117         }
2118       else
2119         {
2120           if (bitsdone != thissize)
2121             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2122                                  bitsdone - thissize, 0, 1);
2123         }
2124
2125       if (first)
2126         result = part;
2127       else
2128         /* Combine the parts with bitwise or.  This works
2129            because we extracted each part as an unsigned bit field.  */
2130         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2131                                OPTAB_LIB_WIDEN);
2132
2133       first = 0;
2134     }
2135
2136   /* Unsigned bit field: we are done.  */
2137   if (unsignedp)
2138     return result;
2139   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2140   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2141                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2142   return expand_shift (RSHIFT_EXPR, word_mode, result,
2143                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2144 }
2145 \f
2146 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2147    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2148    MODE, fill the upper bits with zeros.  Fail if the layout of either
2149    mode is unknown (as for CC modes) or if the extraction would involve
2150    unprofitable mode punning.  Return the value on success, otherwise
2151    return null.
2152
2153    This is different from gen_lowpart* in these respects:
2154
2155      - the returned value must always be considered an rvalue
2156
2157      - when MODE is wider than SRC_MODE, the extraction involves
2158        a zero extension
2159
2160      - when MODE is smaller than SRC_MODE, the extraction involves
2161        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2162
2163    In other words, this routine performs a computation, whereas the
2164    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2165    operations.  */
2166
2167 rtx
2168 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2169 {
2170   machine_mode int_mode, src_int_mode;
2171
2172   if (mode == src_mode)
2173     return src;
2174
2175   if (CONSTANT_P (src))
2176     {
2177       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2178          fails, it will happily create (subreg (symbol_ref)) or similar
2179          invalid SUBREGs.  */
2180       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2181       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2182       if (ret)
2183         return ret;
2184
2185       if (GET_MODE (src) == VOIDmode
2186           || !validate_subreg (mode, src_mode, src, byte))
2187         return NULL_RTX;
2188
2189       src = force_reg (GET_MODE (src), src);
2190       return gen_rtx_SUBREG (mode, src, byte);
2191     }
2192
2193   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2194     return NULL_RTX;
2195
2196   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2197       && MODES_TIEABLE_P (mode, src_mode))
2198     {
2199       rtx x = gen_lowpart_common (mode, src);
2200       if (x)
2201         return x;
2202     }
2203
2204   src_int_mode = int_mode_for_mode (src_mode);
2205   int_mode = int_mode_for_mode (mode);
2206   if (src_int_mode == BLKmode || int_mode == BLKmode)
2207     return NULL_RTX;
2208
2209   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2210     return NULL_RTX;
2211   if (!MODES_TIEABLE_P (int_mode, mode))
2212     return NULL_RTX;
2213
2214   src = gen_lowpart (src_int_mode, src);
2215   src = convert_modes (int_mode, src_int_mode, src, true);
2216   src = gen_lowpart (mode, src);
2217   return src;
2218 }
2219 \f
2220 /* Add INC into TARGET.  */
2221
2222 void
2223 expand_inc (rtx target, rtx inc)
2224 {
2225   rtx value = expand_binop (GET_MODE (target), add_optab,
2226                             target, inc,
2227                             target, 0, OPTAB_LIB_WIDEN);
2228   if (value != target)
2229     emit_move_insn (target, value);
2230 }
2231
2232 /* Subtract DEC from TARGET.  */
2233
2234 void
2235 expand_dec (rtx target, rtx dec)
2236 {
2237   rtx value = expand_binop (GET_MODE (target), sub_optab,
2238                             target, dec,
2239                             target, 0, OPTAB_LIB_WIDEN);
2240   if (value != target)
2241     emit_move_insn (target, value);
2242 }
2243 \f
2244 /* Output a shift instruction for expression code CODE,
2245    with SHIFTED being the rtx for the value to shift,
2246    and AMOUNT the rtx for the amount to shift by.
2247    Store the result in the rtx TARGET, if that is convenient.
2248    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2249    Return the rtx for where the value is.  */
2250
2251 static rtx
2252 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2253                 rtx amount, rtx target, int unsignedp)
2254 {
2255   rtx op1, temp = 0;
2256   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2257   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2258   optab lshift_optab = ashl_optab;
2259   optab rshift_arith_optab = ashr_optab;
2260   optab rshift_uns_optab = lshr_optab;
2261   optab lrotate_optab = rotl_optab;
2262   optab rrotate_optab = rotr_optab;
2263   machine_mode op1_mode;
2264   machine_mode scalar_mode = mode;
2265   int attempt;
2266   bool speed = optimize_insn_for_speed_p ();
2267
2268   if (VECTOR_MODE_P (mode))
2269     scalar_mode = GET_MODE_INNER (mode);
2270   op1 = amount;
2271   op1_mode = GET_MODE (op1);
2272
2273   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2274      shift amount is a vector, use the vector/vector shift patterns.  */
2275   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2276     {
2277       lshift_optab = vashl_optab;
2278       rshift_arith_optab = vashr_optab;
2279       rshift_uns_optab = vlshr_optab;
2280       lrotate_optab = vrotl_optab;
2281       rrotate_optab = vrotr_optab;
2282     }
2283
2284   /* Previously detected shift-counts computed by NEGATE_EXPR
2285      and shifted in the other direction; but that does not work
2286      on all machines.  */
2287
2288   if (SHIFT_COUNT_TRUNCATED)
2289     {
2290       if (CONST_INT_P (op1)
2291           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2292               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2293         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2294                        % GET_MODE_BITSIZE (scalar_mode));
2295       else if (GET_CODE (op1) == SUBREG
2296                && subreg_lowpart_p (op1)
2297                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2298                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2299         op1 = SUBREG_REG (op1);
2300     }
2301
2302   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2303      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2304      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2305      amount instead.  */
2306   if (rotate
2307       && CONST_INT_P (op1)
2308       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2309                    GET_MODE_BITSIZE (scalar_mode) - 1))
2310     {
2311       op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2312       left = !left;
2313       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2314     }
2315
2316   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2317      Note that this is not the case for bigger values.  For instance a rotation
2318      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2319      0x04030201 (bswapsi).  */
2320   if (rotate
2321       && CONST_INT_P (op1)
2322       && INTVAL (op1) == BITS_PER_UNIT
2323       && GET_MODE_SIZE (scalar_mode) == 2
2324       && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2325     return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2326                                   unsignedp);
2327
2328   if (op1 == const0_rtx)
2329     return shifted;
2330
2331   /* Check whether its cheaper to implement a left shift by a constant
2332      bit count by a sequence of additions.  */
2333   if (code == LSHIFT_EXPR
2334       && CONST_INT_P (op1)
2335       && INTVAL (op1) > 0
2336       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2337       && INTVAL (op1) < MAX_BITS_PER_WORD
2338       && (shift_cost (speed, mode, INTVAL (op1))
2339           > INTVAL (op1) * add_cost (speed, mode))
2340       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2341     {
2342       int i;
2343       for (i = 0; i < INTVAL (op1); i++)
2344         {
2345           temp = force_reg (mode, shifted);
2346           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2347                                   unsignedp, OPTAB_LIB_WIDEN);
2348         }
2349       return shifted;
2350     }
2351
2352   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2353     {
2354       enum optab_methods methods;
2355
2356       if (attempt == 0)
2357         methods = OPTAB_DIRECT;
2358       else if (attempt == 1)
2359         methods = OPTAB_WIDEN;
2360       else
2361         methods = OPTAB_LIB_WIDEN;
2362
2363       if (rotate)
2364         {
2365           /* Widening does not work for rotation.  */
2366           if (methods == OPTAB_WIDEN)
2367             continue;
2368           else if (methods == OPTAB_LIB_WIDEN)
2369             {
2370               /* If we have been unable to open-code this by a rotation,
2371                  do it as the IOR of two shifts.  I.e., to rotate A
2372                  by N bits, compute
2373                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2374                  where C is the bitsize of A.
2375
2376                  It is theoretically possible that the target machine might
2377                  not be able to perform either shift and hence we would
2378                  be making two libcalls rather than just the one for the
2379                  shift (similarly if IOR could not be done).  We will allow
2380                  this extremely unlikely lossage to avoid complicating the
2381                  code below.  */
2382
2383               rtx subtarget = target == shifted ? 0 : target;
2384               rtx new_amount, other_amount;
2385               rtx temp1;
2386
2387               new_amount = op1;
2388               if (op1 == const0_rtx)
2389                 return shifted;
2390               else if (CONST_INT_P (op1))
2391                 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2392                                         - INTVAL (op1));
2393               else
2394                 {
2395                   other_amount
2396                     = simplify_gen_unary (NEG, GET_MODE (op1),
2397                                           op1, GET_MODE (op1));
2398                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2399                   other_amount
2400                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2401                                            gen_int_mode (mask, GET_MODE (op1)));
2402                 }
2403
2404               shifted = force_reg (mode, shifted);
2405
2406               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2407                                      mode, shifted, new_amount, 0, 1);
2408               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2409                                       mode, shifted, other_amount,
2410                                       subtarget, 1);
2411               return expand_binop (mode, ior_optab, temp, temp1, target,
2412                                    unsignedp, methods);
2413             }
2414
2415           temp = expand_binop (mode,
2416                                left ? lrotate_optab : rrotate_optab,
2417                                shifted, op1, target, unsignedp, methods);
2418         }
2419       else if (unsignedp)
2420         temp = expand_binop (mode,
2421                              left ? lshift_optab : rshift_uns_optab,
2422                              shifted, op1, target, unsignedp, methods);
2423
2424       /* Do arithmetic shifts.
2425          Also, if we are going to widen the operand, we can just as well
2426          use an arithmetic right-shift instead of a logical one.  */
2427       if (temp == 0 && ! rotate
2428           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2429         {
2430           enum optab_methods methods1 = methods;
2431
2432           /* If trying to widen a log shift to an arithmetic shift,
2433              don't accept an arithmetic shift of the same size.  */
2434           if (unsignedp)
2435             methods1 = OPTAB_MUST_WIDEN;
2436
2437           /* Arithmetic shift */
2438
2439           temp = expand_binop (mode,
2440                                left ? lshift_optab : rshift_arith_optab,
2441                                shifted, op1, target, unsignedp, methods1);
2442         }
2443
2444       /* We used to try extzv here for logical right shifts, but that was
2445          only useful for one machine, the VAX, and caused poor code
2446          generation there for lshrdi3, so the code was deleted and a
2447          define_expand for lshrsi3 was added to vax.md.  */
2448     }
2449
2450   gcc_assert (temp);
2451   return temp;
2452 }
2453
2454 /* Output a shift instruction for expression code CODE,
2455    with SHIFTED being the rtx for the value to shift,
2456    and AMOUNT the amount to shift by.
2457    Store the result in the rtx TARGET, if that is convenient.
2458    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2459    Return the rtx for where the value is.  */
2460
2461 rtx
2462 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2463               int amount, rtx target, int unsignedp)
2464 {
2465   return expand_shift_1 (code, mode,
2466                          shifted, GEN_INT (amount), target, unsignedp);
2467 }
2468
2469 /* Output a shift instruction for expression code CODE,
2470    with SHIFTED being the rtx for the value to shift,
2471    and AMOUNT the tree for the amount to shift by.
2472    Store the result in the rtx TARGET, if that is convenient.
2473    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2474    Return the rtx for where the value is.  */
2475
2476 rtx
2477 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2478                        tree amount, rtx target, int unsignedp)
2479 {
2480   return expand_shift_1 (code, mode,
2481                          shifted, expand_normal (amount), target, unsignedp);
2482 }
2483
2484 \f
2485 /* Indicates the type of fixup needed after a constant multiplication.
2486    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2487    the result should be negated, and ADD_VARIANT means that the
2488    multiplicand should be added to the result.  */
2489 enum mult_variant {basic_variant, negate_variant, add_variant};
2490
2491 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2492                         const struct mult_cost *, machine_mode mode);
2493 static bool choose_mult_variant (machine_mode, HOST_WIDE_INT,
2494                                  struct algorithm *, enum mult_variant *, int);
2495 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2496                               const struct algorithm *, enum mult_variant);
2497 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2498 static rtx extract_high_half (machine_mode, rtx);
2499 static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2500 static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
2501                                        int, int);
2502 /* Compute and return the best algorithm for multiplying by T.
2503    The algorithm must cost less than cost_limit
2504    If retval.cost >= COST_LIMIT, no algorithm was found and all
2505    other field of the returned struct are undefined.
2506    MODE is the machine mode of the multiplication.  */
2507
2508 static void
2509 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2510             const struct mult_cost *cost_limit, machine_mode mode)
2511 {
2512   int m;
2513   struct algorithm *alg_in, *best_alg;
2514   struct mult_cost best_cost;
2515   struct mult_cost new_limit;
2516   int op_cost, op_latency;
2517   unsigned HOST_WIDE_INT orig_t = t;
2518   unsigned HOST_WIDE_INT q;
2519   int maxm, hash_index;
2520   bool cache_hit = false;
2521   enum alg_code cache_alg = alg_zero;
2522   bool speed = optimize_insn_for_speed_p ();
2523   machine_mode imode;
2524   struct alg_hash_entry *entry_ptr;
2525
2526   /* Indicate that no algorithm is yet found.  If no algorithm
2527      is found, this value will be returned and indicate failure.  */
2528   alg_out->cost.cost = cost_limit->cost + 1;
2529   alg_out->cost.latency = cost_limit->latency + 1;
2530
2531   if (cost_limit->cost < 0
2532       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2533     return;
2534
2535   /* Be prepared for vector modes.  */
2536   imode = GET_MODE_INNER (mode);
2537
2538   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2539
2540   /* Restrict the bits of "t" to the multiplication's mode.  */
2541   t &= GET_MODE_MASK (imode);
2542
2543   /* t == 1 can be done in zero cost.  */
2544   if (t == 1)
2545     {
2546       alg_out->ops = 1;
2547       alg_out->cost.cost = 0;
2548       alg_out->cost.latency = 0;
2549       alg_out->op[0] = alg_m;
2550       return;
2551     }
2552
2553   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2554      fail now.  */
2555   if (t == 0)
2556     {
2557       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2558         return;
2559       else
2560         {
2561           alg_out->ops = 1;
2562           alg_out->cost.cost = zero_cost (speed);
2563           alg_out->cost.latency = zero_cost (speed);
2564           alg_out->op[0] = alg_zero;
2565           return;
2566         }
2567     }
2568
2569   /* We'll be needing a couple extra algorithm structures now.  */
2570
2571   alg_in = XALLOCA (struct algorithm);
2572   best_alg = XALLOCA (struct algorithm);
2573   best_cost = *cost_limit;
2574
2575   /* Compute the hash index.  */
2576   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2577
2578   /* See if we already know what to do for T.  */
2579   entry_ptr = alg_hash_entry_ptr (hash_index);
2580   if (entry_ptr->t == t
2581       && entry_ptr->mode == mode
2582       && entry_ptr->mode == mode
2583       && entry_ptr->speed == speed
2584       && entry_ptr->alg != alg_unknown)
2585     {
2586       cache_alg = entry_ptr->alg;
2587
2588       if (cache_alg == alg_impossible)
2589         {
2590           /* The cache tells us that it's impossible to synthesize
2591              multiplication by T within entry_ptr->cost.  */
2592           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2593             /* COST_LIMIT is at least as restrictive as the one
2594                recorded in the hash table, in which case we have no
2595                hope of synthesizing a multiplication.  Just
2596                return.  */
2597             return;
2598
2599           /* If we get here, COST_LIMIT is less restrictive than the
2600              one recorded in the hash table, so we may be able to
2601              synthesize a multiplication.  Proceed as if we didn't
2602              have the cache entry.  */
2603         }
2604       else
2605         {
2606           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2607             /* The cached algorithm shows that this multiplication
2608                requires more cost than COST_LIMIT.  Just return.  This
2609                way, we don't clobber this cache entry with
2610                alg_impossible but retain useful information.  */
2611             return;
2612
2613           cache_hit = true;
2614
2615           switch (cache_alg)
2616             {
2617             case alg_shift:
2618               goto do_alg_shift;
2619
2620             case alg_add_t_m2:
2621             case alg_sub_t_m2:
2622               goto do_alg_addsub_t_m2;
2623
2624             case alg_add_factor:
2625             case alg_sub_factor:
2626               goto do_alg_addsub_factor;
2627
2628             case alg_add_t2_m:
2629               goto do_alg_add_t2_m;
2630
2631             case alg_sub_t2_m:
2632               goto do_alg_sub_t2_m;
2633
2634             default:
2635               gcc_unreachable ();
2636             }
2637         }
2638     }
2639
2640   /* If we have a group of zero bits at the low-order part of T, try
2641      multiplying by the remaining bits and then doing a shift.  */
2642
2643   if ((t & 1) == 0)
2644     {
2645     do_alg_shift:
2646       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2647       if (m < maxm)
2648         {
2649           q = t >> m;
2650           /* The function expand_shift will choose between a shift and
2651              a sequence of additions, so the observed cost is given as
2652              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2653           op_cost = m * add_cost (speed, mode);
2654           if (shift_cost (speed, mode, m) < op_cost)
2655             op_cost = shift_cost (speed, mode, m);
2656           new_limit.cost = best_cost.cost - op_cost;
2657           new_limit.latency = best_cost.latency - op_cost;
2658           synth_mult (alg_in, q, &new_limit, mode);
2659
2660           alg_in->cost.cost += op_cost;
2661           alg_in->cost.latency += op_cost;
2662           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2663             {
2664               best_cost = alg_in->cost;
2665               std::swap (alg_in, best_alg);
2666               best_alg->log[best_alg->ops] = m;
2667               best_alg->op[best_alg->ops] = alg_shift;
2668             }
2669
2670           /* See if treating ORIG_T as a signed number yields a better
2671              sequence.  Try this sequence only for a negative ORIG_T
2672              as it would be useless for a non-negative ORIG_T.  */
2673           if ((HOST_WIDE_INT) orig_t < 0)
2674             {
2675               /* Shift ORIG_T as follows because a right shift of a
2676                  negative-valued signed type is implementation
2677                  defined.  */
2678               q = ~(~orig_t >> m);
2679               /* The function expand_shift will choose between a shift
2680                  and a sequence of additions, so the observed cost is
2681                  given as MIN (m * add_cost(speed, mode),
2682                  shift_cost(speed, mode, m)).  */
2683               op_cost = m * add_cost (speed, mode);
2684               if (shift_cost (speed, mode, m) < op_cost)
2685                 op_cost = shift_cost (speed, mode, m);
2686               new_limit.cost = best_cost.cost - op_cost;
2687               new_limit.latency = best_cost.latency - op_cost;
2688               synth_mult (alg_in, q, &new_limit, mode);
2689
2690               alg_in->cost.cost += op_cost;
2691               alg_in->cost.latency += op_cost;
2692               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2693                 {
2694                   best_cost = alg_in->cost;
2695                   std::swap (alg_in, best_alg);
2696                   best_alg->log[best_alg->ops] = m;
2697                   best_alg->op[best_alg->ops] = alg_shift;
2698                 }
2699             }
2700         }
2701       if (cache_hit)
2702         goto done;
2703     }
2704
2705   /* If we have an odd number, add or subtract one.  */
2706   if ((t & 1) != 0)
2707     {
2708       unsigned HOST_WIDE_INT w;
2709
2710     do_alg_addsub_t_m2:
2711       for (w = 1; (w & t) != 0; w <<= 1)
2712         ;
2713       /* If T was -1, then W will be zero after the loop.  This is another
2714          case where T ends with ...111.  Handling this with (T + 1) and
2715          subtract 1 produces slightly better code and results in algorithm
2716          selection much faster than treating it like the ...0111 case
2717          below.  */
2718       if (w == 0
2719           || (w > 2
2720               /* Reject the case where t is 3.
2721                  Thus we prefer addition in that case.  */
2722               && t != 3))
2723         {
2724           /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2725
2726           op_cost = add_cost (speed, mode);
2727           new_limit.cost = best_cost.cost - op_cost;
2728           new_limit.latency = best_cost.latency - op_cost;
2729           synth_mult (alg_in, t + 1, &new_limit, mode);
2730
2731           alg_in->cost.cost += op_cost;
2732           alg_in->cost.latency += op_cost;
2733           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2734             {
2735               best_cost = alg_in->cost;
2736               std::swap (alg_in, best_alg);
2737               best_alg->log[best_alg->ops] = 0;
2738               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2739             }
2740         }
2741       else
2742         {
2743           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2744
2745           op_cost = add_cost (speed, mode);
2746           new_limit.cost = best_cost.cost - op_cost;
2747           new_limit.latency = best_cost.latency - op_cost;
2748           synth_mult (alg_in, t - 1, &new_limit, mode);
2749
2750           alg_in->cost.cost += op_cost;
2751           alg_in->cost.latency += op_cost;
2752           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2753             {
2754               best_cost = alg_in->cost;
2755               std::swap (alg_in, best_alg);
2756               best_alg->log[best_alg->ops] = 0;
2757               best_alg->op[best_alg->ops] = alg_add_t_m2;
2758             }
2759         }
2760
2761       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2762          quickly with a - a * n for some appropriate constant n.  */
2763       m = exact_log2 (-orig_t + 1);
2764       if (m >= 0 && m < maxm)
2765         {
2766           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2767           /* If the target has a cheap shift-and-subtract insn use
2768              that in preference to a shift insn followed by a sub insn.
2769              Assume that the shift-and-sub is "atomic" with a latency
2770              equal to it's cost, otherwise assume that on superscalar
2771              hardware the shift may be executed concurrently with the
2772              earlier steps in the algorithm.  */
2773           if (shiftsub1_cost (speed, mode, m) <= op_cost)
2774             {
2775               op_cost = shiftsub1_cost (speed, mode, m);
2776               op_latency = op_cost;
2777             }
2778           else
2779             op_latency = add_cost (speed, mode);
2780
2781           new_limit.cost = best_cost.cost - op_cost;
2782           new_limit.latency = best_cost.latency - op_latency;
2783           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2784                       &new_limit, mode);
2785
2786           alg_in->cost.cost += op_cost;
2787           alg_in->cost.latency += op_latency;
2788           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2789             {
2790               best_cost = alg_in->cost;
2791               std::swap (alg_in, best_alg);
2792               best_alg->log[best_alg->ops] = m;
2793               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2794             }
2795         }
2796
2797       if (cache_hit)
2798         goto done;
2799     }
2800
2801   /* Look for factors of t of the form
2802      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2803      If we find such a factor, we can multiply by t using an algorithm that
2804      multiplies by q, shift the result by m and add/subtract it to itself.
2805
2806      We search for large factors first and loop down, even if large factors
2807      are less probable than small; if we find a large factor we will find a
2808      good sequence quickly, and therefore be able to prune (by decreasing
2809      COST_LIMIT) the search.  */
2810
2811  do_alg_addsub_factor:
2812   for (m = floor_log2 (t - 1); m >= 2; m--)
2813     {
2814       unsigned HOST_WIDE_INT d;
2815
2816       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2817       if (t % d == 0 && t > d && m < maxm
2818           && (!cache_hit || cache_alg == alg_add_factor))
2819         {
2820           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2821           if (shiftadd_cost (speed, mode, m) <= op_cost)
2822             op_cost = shiftadd_cost (speed, mode, m);
2823
2824           op_latency = op_cost;
2825
2826
2827           new_limit.cost = best_cost.cost - op_cost;
2828           new_limit.latency = best_cost.latency - op_latency;
2829           synth_mult (alg_in, t / d, &new_limit, mode);
2830
2831           alg_in->cost.cost += op_cost;
2832           alg_in->cost.latency += op_latency;
2833           if (alg_in->cost.latency < op_cost)
2834             alg_in->cost.latency = op_cost;
2835           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2836             {
2837               best_cost = alg_in->cost;
2838               std::swap (alg_in, best_alg);
2839               best_alg->log[best_alg->ops] = m;
2840               best_alg->op[best_alg->ops] = alg_add_factor;
2841             }
2842           /* Other factors will have been taken care of in the recursion.  */
2843           break;
2844         }
2845
2846       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2847       if (t % d == 0 && t > d && m < maxm
2848           && (!cache_hit || cache_alg == alg_sub_factor))
2849         {
2850           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2851           if (shiftsub0_cost (speed, mode, m) <= op_cost)
2852             op_cost = shiftsub0_cost (speed, mode, m);
2853
2854           op_latency = op_cost;
2855
2856           new_limit.cost = best_cost.cost - op_cost;
2857           new_limit.latency = best_cost.latency - op_latency;
2858           synth_mult (alg_in, t / d, &new_limit, mode);
2859
2860           alg_in->cost.cost += op_cost;
2861           alg_in->cost.latency += op_latency;
2862           if (alg_in->cost.latency < op_cost)
2863             alg_in->cost.latency = op_cost;
2864           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2865             {
2866               best_cost = alg_in->cost;
2867               std::swap (alg_in, best_alg);
2868               best_alg->log[best_alg->ops] = m;
2869               best_alg->op[best_alg->ops] = alg_sub_factor;
2870             }
2871           break;
2872         }
2873     }
2874   if (cache_hit)
2875     goto done;
2876
2877   /* Try shift-and-add (load effective address) instructions,
2878      i.e. do a*3, a*5, a*9.  */
2879   if ((t & 1) != 0)
2880     {
2881     do_alg_add_t2_m:
2882       q = t - 1;
2883       q = q & -q;
2884       m = exact_log2 (q);
2885       if (m >= 0 && m < maxm)
2886         {
2887           op_cost = shiftadd_cost (speed, mode, m);
2888           new_limit.cost = best_cost.cost - op_cost;
2889           new_limit.latency = best_cost.latency - op_cost;
2890           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2891
2892           alg_in->cost.cost += op_cost;
2893           alg_in->cost.latency += op_cost;
2894           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2895             {
2896               best_cost = alg_in->cost;
2897               std::swap (alg_in, best_alg);
2898               best_alg->log[best_alg->ops] = m;
2899               best_alg->op[best_alg->ops] = alg_add_t2_m;
2900             }
2901         }
2902       if (cache_hit)
2903         goto done;
2904
2905     do_alg_sub_t2_m:
2906       q = t + 1;
2907       q = q & -q;
2908       m = exact_log2 (q);
2909       if (m >= 0 && m < maxm)
2910         {
2911           op_cost = shiftsub0_cost (speed, mode, m);
2912           new_limit.cost = best_cost.cost - op_cost;
2913           new_limit.latency = best_cost.latency - op_cost;
2914           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2915
2916           alg_in->cost.cost += op_cost;
2917           alg_in->cost.latency += op_cost;
2918           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2919             {
2920               best_cost = alg_in->cost;
2921               std::swap (alg_in, best_alg);
2922               best_alg->log[best_alg->ops] = m;
2923               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2924             }
2925         }
2926       if (cache_hit)
2927         goto done;
2928     }
2929
2930  done:
2931   /* If best_cost has not decreased, we have not found any algorithm.  */
2932   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2933     {
2934       /* We failed to find an algorithm.  Record alg_impossible for
2935          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2936          we are asked to find an algorithm for T within the same or
2937          lower COST_LIMIT, we can immediately return to the
2938          caller.  */
2939       entry_ptr->t = t;
2940       entry_ptr->mode = mode;
2941       entry_ptr->speed = speed;
2942       entry_ptr->alg = alg_impossible;
2943       entry_ptr->cost = *cost_limit;
2944       return;
2945     }
2946
2947   /* Cache the result.  */
2948   if (!cache_hit)
2949     {
2950       entry_ptr->t = t;
2951       entry_ptr->mode = mode;
2952       entry_ptr->speed = speed;
2953       entry_ptr->alg = best_alg->op[best_alg->ops];
2954       entry_ptr->cost.cost = best_cost.cost;
2955       entry_ptr->cost.latency = best_cost.latency;
2956     }
2957
2958   /* If we are getting a too long sequence for `struct algorithm'
2959      to record, make this search fail.  */
2960   if (best_alg->ops == MAX_BITS_PER_WORD)
2961     return;
2962
2963   /* Copy the algorithm from temporary space to the space at alg_out.
2964      We avoid using structure assignment because the majority of
2965      best_alg is normally undefined, and this is a critical function.  */
2966   alg_out->ops = best_alg->ops + 1;
2967   alg_out->cost = best_cost;
2968   memcpy (alg_out->op, best_alg->op,
2969           alg_out->ops * sizeof *alg_out->op);
2970   memcpy (alg_out->log, best_alg->log,
2971           alg_out->ops * sizeof *alg_out->log);
2972 }
2973 \f
2974 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2975    Try three variations:
2976
2977        - a shift/add sequence based on VAL itself
2978        - a shift/add sequence based on -VAL, followed by a negation
2979        - a shift/add sequence based on VAL - 1, followed by an addition.
2980
2981    Return true if the cheapest of these cost less than MULT_COST,
2982    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2983
2984 static bool
2985 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
2986                      struct algorithm *alg, enum mult_variant *variant,
2987                      int mult_cost)
2988 {
2989   struct algorithm alg2;
2990   struct mult_cost limit;
2991   int op_cost;
2992   bool speed = optimize_insn_for_speed_p ();
2993
2994   /* Fail quickly for impossible bounds.  */
2995   if (mult_cost < 0)
2996     return false;
2997
2998   /* Ensure that mult_cost provides a reasonable upper bound.
2999      Any constant multiplication can be performed with less
3000      than 2 * bits additions.  */
3001   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3002   if (mult_cost > op_cost)
3003     mult_cost = op_cost;
3004
3005   *variant = basic_variant;
3006   limit.cost = mult_cost;
3007   limit.latency = mult_cost;
3008   synth_mult (alg, val, &limit, mode);
3009
3010   /* This works only if the inverted value actually fits in an
3011      `unsigned int' */
3012   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3013     {
3014       op_cost = neg_cost (speed, mode);
3015       if (MULT_COST_LESS (&alg->cost, mult_cost))
3016         {
3017           limit.cost = alg->cost.cost - op_cost;
3018           limit.latency = alg->cost.latency - op_cost;
3019         }
3020       else
3021         {
3022           limit.cost = mult_cost - op_cost;
3023           limit.latency = mult_cost - op_cost;
3024         }
3025
3026       synth_mult (&alg2, -val, &limit, mode);
3027       alg2.cost.cost += op_cost;
3028       alg2.cost.latency += op_cost;
3029       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3030         *alg = alg2, *variant = negate_variant;
3031     }
3032
3033   /* This proves very useful for division-by-constant.  */
3034   op_cost = add_cost (speed, mode);
3035   if (MULT_COST_LESS (&alg->cost, mult_cost))
3036     {
3037       limit.cost = alg->cost.cost - op_cost;
3038       limit.latency = alg->cost.latency - op_cost;
3039     }
3040   else
3041     {
3042       limit.cost = mult_cost - op_cost;
3043       limit.latency = mult_cost - op_cost;
3044     }
3045
3046   synth_mult (&alg2, val - 1, &limit, mode);
3047   alg2.cost.cost += op_cost;
3048   alg2.cost.latency += op_cost;
3049   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3050     *alg = alg2, *variant = add_variant;
3051
3052   return MULT_COST_LESS (&alg->cost, mult_cost);
3053 }
3054
3055 /* A subroutine of expand_mult, used for constant multiplications.
3056    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3057    convenient.  Use the shift/add sequence described by ALG and apply
3058    the final fixup specified by VARIANT.  */
3059
3060 static rtx
3061 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3062                    rtx target, const struct algorithm *alg,
3063                    enum mult_variant variant)
3064 {
3065   HOST_WIDE_INT val_so_far;
3066   rtx_insn *insn;
3067   rtx accum, tem;
3068   int opno;
3069   machine_mode nmode;
3070
3071   /* Avoid referencing memory over and over and invalid sharing
3072      on SUBREGs.  */
3073   op0 = force_reg (mode, op0);
3074
3075   /* ACCUM starts out either as OP0 or as a zero, depending on
3076      the first operation.  */
3077
3078   if (alg->op[0] == alg_zero)
3079     {
3080       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3081       val_so_far = 0;
3082     }
3083   else if (alg->op[0] == alg_m)
3084     {
3085       accum = copy_to_mode_reg (mode, op0);
3086       val_so_far = 1;
3087     }
3088   else
3089     gcc_unreachable ();
3090
3091   for (opno = 1; opno < alg->ops; opno++)
3092     {
3093       int log = alg->log[opno];
3094       rtx shift_subtarget = optimize ? 0 : accum;
3095       rtx add_target
3096         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3097            && !optimize)
3098           ? target : 0;
3099       rtx accum_target = optimize ? 0 : accum;
3100       rtx accum_inner;
3101
3102       switch (alg->op[opno])
3103         {
3104         case alg_shift:
3105           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3106           /* REG_EQUAL note will be attached to the following insn.  */
3107           emit_move_insn (accum, tem);
3108           val_so_far <<= log;
3109           break;
3110
3111         case alg_add_t_m2:
3112           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3113           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3114                                  add_target ? add_target : accum_target);
3115           val_so_far += (HOST_WIDE_INT) 1 << log;
3116           break;
3117
3118         case alg_sub_t_m2:
3119           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3120           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3121                                  add_target ? add_target : accum_target);
3122           val_so_far -= (HOST_WIDE_INT) 1 << log;
3123           break;
3124
3125         case alg_add_t2_m:
3126           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3127                                 log, shift_subtarget, 0);
3128           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3129                                  add_target ? add_target : accum_target);
3130           val_so_far = (val_so_far << log) + 1;
3131           break;
3132
3133         case alg_sub_t2_m:
3134           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3135                                 log, shift_subtarget, 0);
3136           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3137                                  add_target ? add_target : accum_target);
3138           val_so_far = (val_so_far << log) - 1;
3139           break;
3140
3141         case alg_add_factor:
3142           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3143           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3144                                  add_target ? add_target : accum_target);
3145           val_so_far += val_so_far << log;
3146           break;
3147
3148         case alg_sub_factor:
3149           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3150           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3151                                  (add_target
3152                                   ? add_target : (optimize ? 0 : tem)));
3153           val_so_far = (val_so_far << log) - val_so_far;
3154           break;
3155
3156         default:
3157           gcc_unreachable ();
3158         }
3159
3160       if (SCALAR_INT_MODE_P (mode))
3161         {
3162           /* Write a REG_EQUAL note on the last insn so that we can cse
3163              multiplication sequences.  Note that if ACCUM is a SUBREG,
3164              we've set the inner register and must properly indicate that.  */
3165           tem = op0, nmode = mode;
3166           accum_inner = accum;
3167           if (GET_CODE (accum) == SUBREG)
3168             {
3169               accum_inner = SUBREG_REG (accum);
3170               nmode = GET_MODE (accum_inner);
3171               tem = gen_lowpart (nmode, op0);
3172             }
3173
3174           insn = get_last_insn ();
3175           set_dst_reg_note (insn, REG_EQUAL,
3176                             gen_rtx_MULT (nmode, tem,
3177                                           gen_int_mode (val_so_far, nmode)),
3178                             accum_inner);
3179         }
3180     }
3181
3182   if (variant == negate_variant)
3183     {
3184       val_so_far = -val_so_far;
3185       accum = expand_unop (mode, neg_optab, accum, target, 0);
3186     }
3187   else if (variant == add_variant)
3188     {
3189       val_so_far = val_so_far + 1;
3190       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3191     }
3192
3193   /* Compare only the bits of val and val_so_far that are significant
3194      in the result mode, to avoid sign-/zero-extension confusion.  */
3195   nmode = GET_MODE_INNER (mode);
3196   val &= GET_MODE_MASK (nmode);
3197   val_so_far &= GET_MODE_MASK (nmode);
3198   gcc_assert (val == val_so_far);
3199
3200   return accum;
3201 }
3202
3203 /* Perform a multiplication and return an rtx for the result.
3204    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3205    TARGET is a suggestion for where to store the result (an rtx).
3206
3207    We check specially for a constant integer as OP1.
3208    If you want this check for OP0 as well, then before calling
3209    you should swap the two operands if OP0 would be constant.  */
3210
3211 rtx
3212 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3213              int unsignedp)
3214 {
3215   enum mult_variant variant;
3216   struct algorithm algorithm;
3217   rtx scalar_op1;
3218   int max_cost;
3219   bool speed = optimize_insn_for_speed_p ();
3220   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3221
3222   if (CONSTANT_P (op0))
3223     std::swap (op0, op1);
3224
3225   /* For vectors, there are several simplifications that can be made if
3226      all elements of the vector constant are identical.  */
3227   scalar_op1 = unwrap_const_vec_duplicate (op1);
3228
3229   if (INTEGRAL_MODE_P (mode))
3230     {
3231       rtx fake_reg;
3232       HOST_WIDE_INT coeff;
3233       bool is_neg;
3234       int mode_bitsize;
3235
3236       if (op1 == CONST0_RTX (mode))
3237         return op1;
3238       if (op1 == CONST1_RTX (mode))
3239         return op0;
3240       if (op1 == CONSTM1_RTX (mode))
3241         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3242                             op0, target, 0);
3243
3244       if (do_trapv)
3245         goto skip_synth;
3246
3247       /* If mode is integer vector mode, check if the backend supports
3248          vector lshift (by scalar or vector) at all.  If not, we can't use
3249          synthetized multiply.  */
3250       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3251           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3252           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3253         goto skip_synth;
3254
3255       /* These are the operations that are potentially turned into
3256          a sequence of shifts and additions.  */
3257       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3258
3259       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3260          less than or equal in size to `unsigned int' this doesn't matter.
3261          If the mode is larger than `unsigned int', then synth_mult works
3262          only if the constant value exactly fits in an `unsigned int' without
3263          any truncation.  This means that multiplying by negative values does
3264          not work; results are off by 2^32 on a 32 bit machine.  */
3265       if (CONST_INT_P (scalar_op1))
3266         {
3267           coeff = INTVAL (scalar_op1);
3268           is_neg = coeff < 0;
3269         }
3270 #if TARGET_SUPPORTS_WIDE_INT
3271       else if (CONST_WIDE_INT_P (scalar_op1))
3272 #else
3273       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3274 #endif
3275         {
3276           int shift = wi::exact_log2 (std::make_pair (scalar_op1, mode));
3277           /* Perfect power of 2 (other than 1, which is handled above).  */
3278           if (shift > 0)
3279             return expand_shift (LSHIFT_EXPR, mode, op0,
3280                                  shift, target, unsignedp);
3281           else
3282             goto skip_synth;
3283         }
3284       else
3285         goto skip_synth;
3286
3287       /* We used to test optimize here, on the grounds that it's better to
3288          produce a smaller program when -O is not used.  But this causes
3289          such a terrible slowdown sometimes that it seems better to always
3290          use synth_mult.  */
3291
3292       /* Special case powers of two.  */
3293       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3294           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3295         return expand_shift (LSHIFT_EXPR, mode, op0,
3296                              floor_log2 (coeff), target, unsignedp);
3297
3298       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3299
3300       /* Attempt to handle multiplication of DImode values by negative
3301          coefficients, by performing the multiplication by a positive
3302          multiplier and then inverting the result.  */
3303       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3304         {
3305           /* Its safe to use -coeff even for INT_MIN, as the
3306              result is interpreted as an unsigned coefficient.
3307              Exclude cost of op0 from max_cost to match the cost
3308              calculation of the synth_mult.  */
3309           coeff = -(unsigned HOST_WIDE_INT) coeff;
3310           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3311                                     mode, speed)
3312                       - neg_cost (speed, mode));
3313           if (max_cost <= 0)
3314             goto skip_synth;
3315
3316           /* Special case powers of two.  */
3317           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3318             {
3319               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3320                                        floor_log2 (coeff), target, unsignedp);
3321               return expand_unop (mode, neg_optab, temp, target, 0);
3322             }
3323
3324           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3325                                    max_cost))
3326             {
3327               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3328                                             &algorithm, variant);
3329               return expand_unop (mode, neg_optab, temp, target, 0);
3330             }
3331           goto skip_synth;
3332         }
3333
3334       /* Exclude cost of op0 from max_cost to match the cost
3335          calculation of the synth_mult.  */
3336       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3337       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3338         return expand_mult_const (mode, op0, coeff, target,
3339                                   &algorithm, variant);
3340     }
3341  skip_synth:
3342
3343   /* Expand x*2.0 as x+x.  */
3344   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3345       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3346     {
3347       op0 = force_reg (GET_MODE (op0), op0);
3348       return expand_binop (mode, add_optab, op0, op0,
3349                            target, unsignedp, OPTAB_LIB_WIDEN);
3350     }
3351
3352   /* This used to use umul_optab if unsigned, but for non-widening multiply
3353      there is no difference between signed and unsigned.  */
3354   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3355                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3356   gcc_assert (op0);
3357   return op0;
3358 }
3359
3360 /* Return a cost estimate for multiplying a register by the given
3361    COEFFicient in the given MODE and SPEED.  */
3362
3363 int
3364 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3365 {
3366   int max_cost;
3367   struct algorithm algorithm;
3368   enum mult_variant variant;
3369
3370   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3371   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3372                            mode, speed);
3373   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3374     return algorithm.cost.cost;
3375   else
3376     return max_cost;
3377 }
3378
3379 /* Perform a widening multiplication and return an rtx for the result.
3380    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3381    TARGET is a suggestion for where to store the result (an rtx).
3382    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3383    or smul_widen_optab.
3384
3385    We check specially for a constant integer as OP1, comparing the
3386    cost of a widening multiply against the cost of a sequence of shifts
3387    and adds.  */
3388
3389 rtx
3390 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3391                       int unsignedp, optab this_optab)
3392 {
3393   bool speed = optimize_insn_for_speed_p ();
3394   rtx cop1;
3395
3396   if (CONST_INT_P (op1)
3397       && GET_MODE (op0) != VOIDmode
3398       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3399                                 this_optab == umul_widen_optab))
3400       && CONST_INT_P (cop1)
3401       && (INTVAL (cop1) >= 0
3402           || HWI_COMPUTABLE_MODE_P (mode)))
3403     {
3404       HOST_WIDE_INT coeff = INTVAL (cop1);
3405       int max_cost;
3406       enum mult_variant variant;
3407       struct algorithm algorithm;
3408
3409       if (coeff == 0)
3410         return CONST0_RTX (mode);
3411
3412       /* Special case powers of two.  */
3413       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3414         {
3415           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3416           return expand_shift (LSHIFT_EXPR, mode, op0,
3417                                floor_log2 (coeff), target, unsignedp);
3418         }
3419
3420       /* Exclude cost of op0 from max_cost to match the cost
3421          calculation of the synth_mult.  */
3422       max_cost = mul_widen_cost (speed, mode);
3423       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3424                                max_cost))
3425         {
3426           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3427           return expand_mult_const (mode, op0, coeff, target,
3428                                     &algorithm, variant);
3429         }
3430     }
3431   return expand_binop (mode, this_optab, op0, op1, target,
3432                        unsignedp, OPTAB_LIB_WIDEN);
3433 }
3434 \f
3435 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3436    replace division by D, and put the least significant N bits of the result
3437    in *MULTIPLIER_PTR and return the most significant bit.
3438
3439    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3440    needed precision is in PRECISION (should be <= N).
3441
3442    PRECISION should be as small as possible so this function can choose
3443    multiplier more freely.
3444
3445    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3446    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3447
3448    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3449    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3450
3451 unsigned HOST_WIDE_INT
3452 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3453                    unsigned HOST_WIDE_INT *multiplier_ptr,
3454                    int *post_shift_ptr, int *lgup_ptr)
3455 {
3456   int lgup, post_shift;
3457   int pow, pow2;
3458
3459   /* lgup = ceil(log2(divisor)); */
3460   lgup = ceil_log2 (d);
3461
3462   gcc_assert (lgup <= n);
3463
3464   pow = n + lgup;
3465   pow2 = n + lgup - precision;
3466
3467   /* mlow = 2^(N + lgup)/d */
3468   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3469   wide_int mlow = wi::udiv_trunc (val, d);
3470
3471   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3472   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3473   wide_int mhigh = wi::udiv_trunc (val, d);
3474
3475   /* If precision == N, then mlow, mhigh exceed 2^N
3476      (but they do not exceed 2^(N+1)).  */
3477
3478   /* Reduce to lowest terms.  */
3479   for (post_shift = lgup; post_shift > 0; post_shift--)
3480     {
3481       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3482                                                        HOST_BITS_PER_WIDE_INT);
3483       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3484                                                        HOST_BITS_PER_WIDE_INT);
3485       if (ml_lo >= mh_lo)
3486         break;
3487
3488       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3489       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3490     }
3491
3492   *post_shift_ptr = post_shift;
3493   *lgup_ptr = lgup;
3494   if (n < HOST_BITS_PER_WIDE_INT)
3495     {
3496       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3497       *multiplier_ptr = mhigh.to_uhwi () & mask;
3498       return mhigh.to_uhwi () >= mask;
3499     }
3500   else
3501     {
3502       *multiplier_ptr = mhigh.to_uhwi ();
3503       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3504     }
3505 }
3506
3507 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3508    congruent to 1 (mod 2**N).  */
3509
3510 static unsigned HOST_WIDE_INT
3511 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3512 {
3513   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3514
3515   /* The algorithm notes that the choice y = x satisfies
3516      x*y == 1 mod 2^3, since x is assumed odd.
3517      Each iteration doubles the number of bits of significance in y.  */
3518
3519   unsigned HOST_WIDE_INT mask;
3520   unsigned HOST_WIDE_INT y = x;
3521   int nbit = 3;
3522
3523   mask = (n == HOST_BITS_PER_WIDE_INT
3524           ? ~(unsigned HOST_WIDE_INT) 0
3525           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3526
3527   while (nbit < n)
3528     {
3529       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3530       nbit *= 2;
3531     }
3532   return y;
3533 }
3534
3535 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3536    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3537    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3538    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3539    become signed.
3540
3541    The result is put in TARGET if that is convenient.
3542
3543    MODE is the mode of operation.  */
3544
3545 rtx
3546 expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
3547                              rtx op1, rtx target, int unsignedp)
3548 {
3549   rtx tem;
3550   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3551
3552   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3553                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3554   tem = expand_and (mode, tem, op1, NULL_RTX);
3555   adj_operand
3556     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3557                      adj_operand);
3558
3559   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3560                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3561   tem = expand_and (mode, tem, op0, NULL_RTX);
3562   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3563                           target);
3564
3565   return target;
3566 }
3567
3568 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3569
3570 static rtx
3571 extract_high_half (machine_mode mode, rtx op)
3572 {
3573   machine_mode wider_mode;
3574
3575   if (mode == word_mode)
3576     return gen_highpart (mode, op);
3577
3578   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3579
3580   wider_mode = GET_MODE_WIDER_MODE (mode);
3581   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3582                      GET_MODE_BITSIZE (mode), 0, 1);
3583   return convert_modes (mode, wider_mode, op, 0);
3584 }
3585
3586 /* Like expmed_mult_highpart, but only consider using a multiplication
3587    optab.  OP1 is an rtx for the constant operand.  */
3588
3589 static rtx
3590 expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
3591                             rtx target, int unsignedp, int max_cost)
3592 {
3593   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3594   machine_mode wider_mode;
3595   optab moptab;
3596   rtx tem;
3597   int size;
3598   bool speed = optimize_insn_for_speed_p ();
3599
3600   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3601
3602   wider_mode = GET_MODE_WIDER_MODE (mode);
3603   size = GET_MODE_BITSIZE (mode);
3604
3605   /* Firstly, try using a multiplication insn that only generates the needed
3606      high part of the product, and in the sign flavor of unsignedp.  */
3607   if (mul_highpart_cost (speed, mode) < max_cost)
3608     {
3609       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3610       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3611                           unsignedp, OPTAB_DIRECT);
3612       if (tem)
3613         return tem;
3614     }
3615
3616   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3617      Need to adjust the result after the multiplication.  */
3618   if (size - 1 < BITS_PER_WORD
3619       && (mul_highpart_cost (speed, mode)
3620           + 2 * shift_cost (speed, mode, size-1)
3621           + 4 * add_cost (speed, mode) < max_cost))
3622     {
3623       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3624       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3625                           unsignedp, OPTAB_DIRECT);
3626       if (tem)
3627         /* We used the wrong signedness.  Adjust the result.  */
3628         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3629                                             tem, unsignedp);
3630     }
3631
3632   /* Try widening multiplication.  */
3633   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3634   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3635       && mul_widen_cost (speed, wider_mode) < max_cost)
3636     {
3637       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3638                           unsignedp, OPTAB_WIDEN);
3639       if (tem)
3640         return extract_high_half (mode, tem);
3641     }
3642
3643   /* Try widening the mode and perform a non-widening multiplication.  */
3644   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3645       && size - 1 < BITS_PER_WORD
3646       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3647           < max_cost))
3648     {
3649       rtx_insn *insns;
3650       rtx wop0, wop1;
3651
3652       /* We need to widen the operands, for example to ensure the
3653          constant multiplier is correctly sign or zero extended.
3654          Use a sequence to clean-up any instructions emitted by
3655          the conversions if things don't work out.  */
3656       start_sequence ();
3657       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3658       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3659       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3660                           unsignedp, OPTAB_WIDEN);
3661       insns = get_insns ();
3662       end_sequence ();
3663
3664       if (tem)
3665         {
3666           emit_insn (insns);
3667           return extract_high_half (mode, tem);
3668         }
3669     }
3670
3671   /* Try widening multiplication of opposite signedness, and adjust.  */
3672   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3673   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3674       && size - 1 < BITS_PER_WORD
3675       && (mul_widen_cost (speed, wider_mode)
3676           + 2 * shift_cost (speed, mode, size-1)
3677           + 4 * add_cost (speed, mode) < max_cost))
3678     {
3679       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3680                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3681       if (tem != 0)
3682         {
3683           tem = extract_high_half (mode, tem);
3684           /* We used the wrong signedness.  Adjust the result.  */
3685           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3686                                               target, unsignedp);
3687         }
3688     }
3689
3690   return 0;
3691 }
3692
3693 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3694    putting the high half of the result in TARGET if that is convenient,
3695    and return where the result is.  If the operation can not be performed,
3696    0 is returned.
3697
3698    MODE is the mode of operation and result.
3699
3700    UNSIGNEDP nonzero means unsigned multiply.
3701
3702    MAX_COST is the total allowed cost for the expanded RTL.  */
3703
3704 static rtx
3705 expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
3706                       rtx target, int unsignedp, int max_cost)
3707 {
3708   machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3709   unsigned HOST_WIDE_INT cnst1;
3710   int extra_cost;
3711   bool sign_adjust = false;
3712   enum mult_variant variant;
3713   struct algorithm alg;
3714   rtx tem;
3715   bool speed = optimize_insn_for_speed_p ();
3716
3717   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3718   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3719   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3720
3721   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3722
3723   /* We can't optimize modes wider than BITS_PER_WORD.
3724      ??? We might be able to perform double-word arithmetic if
3725      mode == word_mode, however all the cost calculations in
3726      synth_mult etc. assume single-word operations.  */
3727   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3728     return expmed_mult_highpart_optab (mode, op0, op1, target,
3729                                        unsignedp, max_cost);
3730
3731   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3732
3733   /* Check whether we try to multiply by a negative constant.  */
3734   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3735     {
3736       sign_adjust = true;
3737       extra_cost += add_cost (speed, mode);
3738     }
3739
3740   /* See whether shift/add multiplication is cheap enough.  */
3741   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3742                            max_cost - extra_cost))
3743     {
3744       /* See whether the specialized multiplication optabs are
3745          cheaper than the shift/add version.  */
3746       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3747                                         alg.cost.cost + extra_cost);
3748       if (tem)
3749         return tem;
3750
3751       tem = convert_to_mode (wider_mode, op0, unsignedp);
3752       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3753       tem = extract_high_half (mode, tem);
3754
3755       /* Adjust result for signedness.  */
3756       if (sign_adjust)
3757         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3758
3759       return tem;
3760     }
3761   return expmed_mult_highpart_optab (mode, op0, op1, target,
3762                                      unsignedp, max_cost);
3763 }
3764
3765
3766 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3767
3768 static rtx
3769 expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3770 {
3771   rtx result, temp, shift;
3772   rtx_code_label *label;
3773   int logd;
3774   int prec = GET_MODE_PRECISION (mode);
3775
3776   logd = floor_log2 (d);
3777   result = gen_reg_rtx (mode);
3778
3779   /* Avoid conditional branches when they're expensive.  */
3780   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3781       && optimize_insn_for_speed_p ())
3782     {
3783       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3784                                       mode, 0, -1);
3785       if (signmask)
3786         {
3787           HOST_WIDE_INT masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3788           signmask = force_reg (mode, signmask);
3789           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3790
3791           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3792              which instruction sequence to use.  If logical right shifts
3793              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3794              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3795
3796           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3797           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3798               || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
3799                   > COSTS_N_INSNS (2)))
3800             {
3801               temp = expand_binop (mode, xor_optab, op0, signmask,
3802                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3803               temp = expand_binop (mode, sub_optab, temp, signmask,
3804                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3805               temp = expand_binop (mode, and_optab, temp,
3806                                    gen_int_mode (masklow, mode),
3807                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3808               temp = expand_binop (mode, xor_optab, temp, signmask,
3809                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3810               temp = expand_binop (mode, sub_optab, temp, signmask,
3811                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3812             }
3813           else
3814             {
3815               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3816                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3817               signmask = force_reg (mode, signmask);
3818
3819               temp = expand_binop (mode, add_optab, op0, signmask,
3820                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3821               temp = expand_binop (mode, and_optab, temp,
3822                                    gen_int_mode (masklow, mode),
3823                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3824               temp = expand_binop (mode, sub_optab, temp, signmask,
3825                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3826             }
3827           return temp;
3828         }
3829     }
3830
3831   /* Mask contains the mode's signbit and the significant bits of the
3832      modulus.  By including the signbit in the operation, many targets
3833      can avoid an explicit compare operation in the following comparison
3834      against zero.  */
3835   wide_int mask = wi::mask (logd, false, prec);
3836   mask = wi::set_bit (mask, prec - 1);
3837
3838   temp = expand_binop (mode, and_optab, op0,
3839                        immed_wide_int_const (mask, mode),
3840                        result, 1, OPTAB_LIB_WIDEN);
3841   if (temp != result)
3842     emit_move_insn (result, temp);
3843
3844   label = gen_label_rtx ();
3845   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3846
3847   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3848                        0, OPTAB_LIB_WIDEN);
3849
3850   mask = wi::mask (logd, true, prec);
3851   temp = expand_binop (mode, ior_optab, temp,
3852                        immed_wide_int_const (mask, mode),
3853                        result, 1, OPTAB_LIB_WIDEN);
3854   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3855                        0, OPTAB_LIB_WIDEN);
3856   if (temp != result)
3857     emit_move_insn (result, temp);
3858   emit_label (label);
3859   return result;
3860 }
3861
3862 /* Expand signed division of OP0 by a power of two D in mode MODE.
3863    This routine is only called for positive values of D.  */
3864
3865 static rtx
3866 expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3867 {
3868   rtx temp;
3869   rtx_code_label *label;
3870   int logd;
3871
3872   logd = floor_log2 (d);
3873
3874   if (d == 2
3875       && BRANCH_COST (optimize_insn_for_speed_p (),
3876                       false) >= 1)
3877     {
3878       temp = gen_reg_rtx (mode);
3879       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3880       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3881                            0, OPTAB_LIB_WIDEN);
3882       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3883     }
3884
3885   if (HAVE_conditional_move
3886       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
3887     {
3888       rtx temp2;
3889
3890       start_sequence ();
3891       temp2 = copy_to_mode_reg (mode, op0);
3892       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3893                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3894       temp = force_reg (mode, temp);
3895
3896       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3897       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3898                                      mode, temp, temp2, mode, 0);
3899       if (temp2)
3900         {
3901           rtx_insn *seq = get_insns ();
3902           end_sequence ();
3903           emit_insn (seq);
3904           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3905         }
3906       end_sequence ();
3907     }
3908
3909   if (BRANCH_COST (optimize_insn_for_speed_p (),
3910                    false) >= 2)
3911     {
3912       int ushift = GET_MODE_BITSIZE (mode) - logd;
3913
3914       temp = gen_reg_rtx (mode);
3915       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3916       if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3917           || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3918              > COSTS_N_INSNS (1))
3919         temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3920                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3921       else
3922         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3923                              ushift, NULL_RTX, 1);
3924       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3925                            0, OPTAB_LIB_WIDEN);
3926       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3927     }
3928
3929   label = gen_label_rtx ();
3930   temp = copy_to_mode_reg (mode, op0);
3931   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3932   expand_inc (temp, gen_int_mode (d - 1, mode));
3933   emit_label (label);
3934   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3935 }
3936 \f
3937 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3938    if that is convenient, and returning where the result is.
3939    You may request either the quotient or the remainder as the result;
3940    specify REM_FLAG nonzero to get the remainder.
3941
3942    CODE is the expression code for which kind of division this is;
3943    it controls how rounding is done.  MODE is the machine mode to use.
3944    UNSIGNEDP nonzero means do unsigned division.  */
3945
3946 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3947    and then correct it by or'ing in missing high bits
3948    if result of ANDI is nonzero.
3949    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3950    This could optimize to a bfexts instruction.
3951    But C doesn't use these operations, so their optimizations are
3952    left for later.  */
3953 /* ??? For modulo, we don't actually need the highpart of the first product,
3954    the low part will do nicely.  And for small divisors, the second multiply
3955    can also be a low-part only multiply or even be completely left out.
3956    E.g. to calculate the remainder of a division by 3 with a 32 bit
3957    multiply, multiply with 0x55555556 and extract the upper two bits;
3958    the result is exact for inputs up to 0x1fffffff.
3959    The input range can be reduced by using cross-sum rules.
3960    For odd divisors >= 3, the following table gives right shift counts
3961    so that if a number is shifted by an integer multiple of the given
3962    amount, the remainder stays the same:
3963    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3964    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3965    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3966    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3967    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3968
3969    Cross-sum rules for even numbers can be derived by leaving as many bits
3970    to the right alone as the divisor has zeros to the right.
3971    E.g. if x is an unsigned 32 bit number:
3972    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3973    */
3974
3975 rtx
3976 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
3977                rtx op0, rtx op1, rtx target, int unsignedp)
3978 {
3979   machine_mode compute_mode;
3980   rtx tquotient;
3981   rtx quotient = 0, remainder = 0;
3982   rtx_insn *last;
3983   int size;
3984   rtx_insn *insn;
3985   optab optab1, optab2;
3986   int op1_is_constant, op1_is_pow2 = 0;
3987   int max_cost, extra_cost;
3988   static HOST_WIDE_INT last_div_const = 0;
3989   bool speed = optimize_insn_for_speed_p ();
3990
3991   op1_is_constant = CONST_INT_P (op1);
3992   if (op1_is_constant)
3993     {
3994       unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1);
3995       if (unsignedp)
3996         ext_op1 &= GET_MODE_MASK (mode);
3997       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3998                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3999     }
4000
4001   /*
4002      This is the structure of expand_divmod:
4003
4004      First comes code to fix up the operands so we can perform the operations
4005      correctly and efficiently.
4006
4007      Second comes a switch statement with code specific for each rounding mode.
4008      For some special operands this code emits all RTL for the desired
4009      operation, for other cases, it generates only a quotient and stores it in
4010      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4011      to indicate that it has not done anything.
4012
4013      Last comes code that finishes the operation.  If QUOTIENT is set and
4014      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4015      QUOTIENT is not set, it is computed using trunc rounding.
4016
4017      We try to generate special code for division and remainder when OP1 is a
4018      constant.  If |OP1| = 2**n we can use shifts and some other fast
4019      operations.  For other values of OP1, we compute a carefully selected
4020      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4021      by m.
4022
4023      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4024      half of the product.  Different strategies for generating the product are
4025      implemented in expmed_mult_highpart.
4026
4027      If what we actually want is the remainder, we generate that by another
4028      by-constant multiplication and a subtraction.  */
4029
4030   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4031      code below will malfunction if we are, so check here and handle
4032      the special case if so.  */
4033   if (op1 == const1_rtx)
4034     return rem_flag ? const0_rtx : op0;
4035
4036     /* When dividing by -1, we could get an overflow.
4037      negv_optab can handle overflows.  */
4038   if (! unsignedp && op1 == constm1_rtx)
4039     {
4040       if (rem_flag)
4041         return const0_rtx;
4042       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4043                           ? negv_optab : neg_optab, op0, target, 0);
4044     }
4045
4046   if (target
4047       /* Don't use the function value register as a target
4048          since we have to read it as well as write it,
4049          and function-inlining gets confused by this.  */
4050       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4051           /* Don't clobber an operand while doing a multi-step calculation.  */
4052           || ((rem_flag || op1_is_constant)
4053               && (reg_mentioned_p (target, op0)
4054                   || (MEM_P (op0) && MEM_P (target))))
4055           || reg_mentioned_p (target, op1)
4056           || (MEM_P (op1) && MEM_P (target))))
4057     target = 0;
4058
4059   /* Get the mode in which to perform this computation.  Normally it will
4060      be MODE, but sometimes we can't do the desired operation in MODE.
4061      If so, pick a wider mode in which we can do the operation.  Convert
4062      to that mode at the start to avoid repeated conversions.
4063
4064      First see what operations we need.  These depend on the expression
4065      we are evaluating.  (We assume that divxx3 insns exist under the
4066      same conditions that modxx3 insns and that these insns don't normally
4067      fail.  If these assumptions are not correct, we may generate less
4068      efficient code in some cases.)
4069
4070      Then see if we find a mode in which we can open-code that operation
4071      (either a division, modulus, or shift).  Finally, check for the smallest
4072      mode for which we can do the operation with a library call.  */
4073
4074   /* We might want to refine this now that we have division-by-constant
4075      optimization.  Since expmed_mult_highpart tries so many variants, it is
4076      not straightforward to generalize this.  Maybe we should make an array
4077      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4078
4079   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
4080             ? (unsignedp ? lshr_optab : ashr_optab)
4081             : (unsignedp ? udiv_optab : sdiv_optab));
4082   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
4083             ? optab1
4084             : (unsignedp ? udivmod_optab : sdivmod_optab));
4085
4086   for (compute_mode = mode; compute_mode != VOIDmode;
4087        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4088     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4089         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4090       break;
4091
4092   if (compute_mode == VOIDmode)
4093     for (compute_mode = mode; compute_mode != VOIDmode;
4094          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4095       if (optab_libfunc (optab1, compute_mode)
4096           || optab_libfunc (optab2, compute_mode))
4097         break;
4098
4099   /* If we still couldn't find a mode, use MODE, but expand_binop will
4100      probably die.  */
4101   if (compute_mode == VOIDmode)
4102     compute_mode = mode;
4103
4104   if (target && GET_MODE (target) == compute_mode)
4105     tquotient = target;
4106   else
4107     tquotient = gen_reg_rtx (compute_mode);
4108
4109   size = GET_MODE_BITSIZE (compute_mode);
4110 #if 0
4111   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4112      (mode), and thereby get better code when OP1 is a constant.  Do that
4113      later.  It will require going over all usages of SIZE below.  */
4114   size = GET_MODE_BITSIZE (mode);
4115 #endif
4116
4117   /* Only deduct something for a REM if the last divide done was
4118      for a different constant.   Then set the constant of the last
4119      divide.  */
4120   max_cost = (unsignedp
4121               ? udiv_cost (speed, compute_mode)
4122               : sdiv_cost (speed, compute_mode));
4123   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4124                      && INTVAL (op1) == last_div_const))
4125     max_cost -= (mul_cost (speed, compute_mode)
4126                  + add_cost (speed, compute_mode));
4127
4128   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4129
4130   /* Now convert to the best mode to use.  */
4131   if (compute_mode != mode)
4132     {
4133       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4134       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4135
4136       /* convert_modes may have placed op1 into a register, so we
4137          must recompute the following.  */
4138       op1_is_constant = CONST_INT_P (op1);
4139       op1_is_pow2 = (op1_is_constant
4140                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4141                           || (! unsignedp
4142                               && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1))))));
4143     }
4144
4145   /* If one of the operands is a volatile MEM, copy it into a register.  */
4146
4147   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4148     op0 = force_reg (compute_mode, op0);
4149   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4150     op1 = force_reg (compute_mode, op1);
4151
4152   /* If we need the remainder or if OP1 is constant, we need to
4153      put OP0 in a register in case it has any queued subexpressions.  */
4154   if (rem_flag || op1_is_constant)
4155     op0 = force_reg (compute_mode, op0);
4156
4157   last = get_last_insn ();
4158
4159   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4160   if (unsignedp)
4161     {
4162       if (code == FLOOR_DIV_EXPR)
4163         code = TRUNC_DIV_EXPR;
4164       if (code == FLOOR_MOD_EXPR)
4165         code = TRUNC_MOD_EXPR;
4166       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4167         code = TRUNC_DIV_EXPR;
4168     }
4169
4170   if (op1 != const0_rtx)
4171     switch (code)
4172       {
4173       case TRUNC_MOD_EXPR:
4174       case TRUNC_DIV_EXPR:
4175         if (op1_is_constant)
4176           {
4177             if (unsignedp)
4178               {
4179                 unsigned HOST_WIDE_INT mh, ml;
4180                 int pre_shift, post_shift;
4181                 int dummy;
4182                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4183                                             & GET_MODE_MASK (compute_mode));
4184
4185                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4186                   {
4187                     pre_shift = floor_log2 (d);
4188                     if (rem_flag)
4189                       {
4190                         unsigned HOST_WIDE_INT mask
4191                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4192                         remainder
4193                           = expand_binop (compute_mode, and_optab, op0,
4194                                           gen_int_mode (mask, compute_mode),
4195                                           remainder, 1,
4196                                           OPTAB_LIB_WIDEN);
4197                         if (remainder)
4198                           return gen_lowpart (mode, remainder);
4199                       }
4200                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4201                                              pre_shift, tquotient, 1);
4202                   }
4203                 else if (size <= HOST_BITS_PER_WIDE_INT)
4204                   {
4205                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4206                       {
4207                         /* Most significant bit of divisor is set; emit an scc
4208                            insn.  */
4209                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4210                                                           compute_mode, 1, 1);
4211                       }
4212                     else
4213                       {
4214                         /* Find a suitable multiplier and right shift count
4215                            instead of multiplying with D.  */
4216
4217                         mh = choose_multiplier (d, size, size,
4218                                                 &ml, &post_shift, &dummy);
4219
4220                         /* If the suggested multiplier is more than SIZE bits,
4221                            we can do better for even divisors, using an
4222                            initial right shift.  */
4223                         if (mh != 0 && (d & 1) == 0)
4224                           {
4225                             pre_shift = floor_log2 (d & -d);
4226                             mh = choose_multiplier (d >> pre_shift, size,
4227                                                     size - pre_shift,
4228                                                     &ml, &post_shift, &dummy);
4229                             gcc_assert (!mh);
4230                           }
4231                         else
4232                           pre_shift = 0;
4233
4234                         if (mh != 0)
4235                           {
4236                             rtx t1, t2, t3, t4;
4237
4238                             if (post_shift - 1 >= BITS_PER_WORD)
4239                               goto fail1;
4240
4241                             extra_cost
4242                               = (shift_cost (speed, compute_mode, post_shift - 1)
4243                                  + shift_cost (speed, compute_mode, 1)
4244                                  + 2 * add_cost (speed, compute_mode));
4245                             t1 = expmed_mult_highpart
4246                               (compute_mode, op0,
4247                                gen_int_mode (ml, compute_mode),
4248                                NULL_RTX, 1, max_cost - extra_cost);
4249                             if (t1 == 0)
4250                               goto fail1;
4251                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4252                                                                op0, t1),
4253                                                 NULL_RTX);
4254                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4255                                                t2, 1, NULL_RTX, 1);
4256                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4257                                                               t1, t3),
4258                                                 NULL_RTX);
4259                             quotient = expand_shift
4260                               (RSHIFT_EXPR, compute_mode, t4,
4261                                post_shift - 1, tquotient, 1);
4262                           }
4263                         else
4264                           {
4265                             rtx t1, t2;
4266
4267                             if (pre_shift >= BITS_PER_WORD
4268                                 || post_shift >= BITS_PER_WORD)
4269                               goto fail1;
4270
4271                             t1 = expand_shift
4272                               (RSHIFT_EXPR, compute_mode, op0,
4273                                pre_shift, NULL_RTX, 1);
4274                             extra_cost
4275                               = (shift_cost (speed, compute_mode, pre_shift)
4276                                  + shift_cost (speed, compute_mode, post_shift));
4277                             t2 = expmed_mult_highpart
4278                               (compute_mode, t1,
4279                                gen_int_mode (ml, compute_mode),
4280                                NULL_RTX, 1, max_cost - extra_cost);
4281                             if (t2 == 0)
4282                               goto fail1;
4283                             quotient = expand_shift
4284                               (RSHIFT_EXPR, compute_mode, t2,
4285                                post_shift, tquotient, 1);
4286                           }
4287                       }
4288                   }
4289                 else            /* Too wide mode to use tricky code */
4290                   break;
4291
4292                 insn = get_last_insn ();
4293                 if (insn != last)
4294                   set_dst_reg_note (insn, REG_EQUAL,
4295                                     gen_rtx_UDIV (compute_mode, op0, op1),
4296                                     quotient);
4297               }
4298             else                /* TRUNC_DIV, signed */
4299               {
4300                 unsigned HOST_WIDE_INT ml;
4301                 int lgup, post_shift;
4302                 rtx mlr;
4303                 HOST_WIDE_INT d = INTVAL (op1);
4304                 unsigned HOST_WIDE_INT abs_d;
4305
4306                 /* Since d might be INT_MIN, we have to cast to
4307                    unsigned HOST_WIDE_INT before negating to avoid
4308                    undefined signed overflow.  */
4309                 abs_d = (d >= 0
4310                          ? (unsigned HOST_WIDE_INT) d
4311                          : - (unsigned HOST_WIDE_INT) d);
4312
4313                 /* n rem d = n rem -d */
4314                 if (rem_flag && d < 0)
4315                   {
4316                     d = abs_d;
4317                     op1 = gen_int_mode (abs_d, compute_mode);
4318                   }
4319
4320                 if (d == 1)
4321                   quotient = op0;
4322                 else if (d == -1)
4323                   quotient = expand_unop (compute_mode, neg_optab, op0,
4324                                           tquotient, 0);
4325                 else if (HOST_BITS_PER_WIDE_INT >= size
4326                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4327                   {
4328                     /* This case is not handled correctly below.  */
4329                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4330                                                 compute_mode, 1, 1);
4331                     if (quotient == 0)
4332                       goto fail1;
4333                   }
4334                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4335                          && (rem_flag
4336                              ? smod_pow2_cheap (speed, compute_mode)
4337                              : sdiv_pow2_cheap (speed, compute_mode))
4338                          /* We assume that cheap metric is true if the
4339                             optab has an expander for this mode.  */
4340                          && ((optab_handler ((rem_flag ? smod_optab
4341                                               : sdiv_optab),
4342                                              compute_mode)
4343                               != CODE_FOR_nothing)
4344                              || (optab_handler (sdivmod_optab,
4345                                                 compute_mode)
4346                                  != CODE_FOR_nothing)))
4347                   ;
4348                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4349                   {
4350                     if (rem_flag)
4351                       {
4352                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4353                         if (remainder)
4354                           return gen_lowpart (mode, remainder);
4355                       }
4356
4357                     if (sdiv_pow2_cheap (speed, compute_mode)
4358                         && ((optab_handler (sdiv_optab, compute_mode)
4359                              != CODE_FOR_nothing)
4360                             || (optab_handler (sdivmod_optab, compute_mode)
4361                                 != CODE_FOR_nothing)))
4362                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4363                                                 compute_mode, op0,
4364                                                 gen_int_mode (abs_d,
4365                                                               compute_mode),
4366                                                 NULL_RTX, 0);
4367                     else
4368                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4369
4370                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4371                        negate the quotient.  */
4372                     if (d < 0)
4373                       {
4374                         insn = get_last_insn ();
4375                         if (insn != last
4376                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4377                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4378                           set_dst_reg_note (insn, REG_EQUAL,
4379                                             gen_rtx_DIV (compute_mode, op0,
4380                                                          gen_int_mode
4381                                                            (abs_d,
4382                                                             compute_mode)),
4383                                             quotient);
4384
4385                         quotient = expand_unop (compute_mode, neg_optab,
4386                                                 quotient, quotient, 0);
4387                       }
4388                   }
4389                 else if (size <= HOST_BITS_PER_WIDE_INT)
4390                   {
4391                     choose_multiplier (abs_d, size, size - 1,
4392                                        &ml, &post_shift, &lgup);
4393                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4394                       {
4395                         rtx t1, t2, t3;
4396
4397                         if (post_shift >= BITS_PER_WORD
4398                             || size - 1 >= BITS_PER_WORD)
4399                           goto fail1;
4400
4401                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4402                                       + shift_cost (speed, compute_mode, size - 1)
4403                                       + add_cost (speed, compute_mode));
4404                         t1 = expmed_mult_highpart
4405                           (compute_mode, op0, gen_int_mode (ml, compute_mode),
4406                            NULL_RTX, 0, max_cost - extra_cost);
4407                         if (t1 == 0)
4408                           goto fail1;
4409                         t2 = expand_shift
4410                           (RSHIFT_EXPR, compute_mode, t1,
4411                            post_shift, NULL_RTX, 0);
4412                         t3 = expand_shift
4413                           (RSHIFT_EXPR, compute_mode, op0,
4414                            size - 1, NULL_RTX, 0);
4415                         if (d < 0)
4416                           quotient
4417                             = force_operand (gen_rtx_MINUS (compute_mode,
4418                                                             t3, t2),
4419                                              tquotient);
4420                         else
4421                           quotient
4422                             = force_operand (gen_rtx_MINUS (compute_mode,
4423                                                             t2, t3),
4424                                              tquotient);
4425                       }
4426                     else
4427                       {
4428                         rtx t1, t2, t3, t4;
4429
4430                         if (post_shift >= BITS_PER_WORD
4431                             || size - 1 >= BITS_PER_WORD)
4432                           goto fail1;
4433
4434                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4435                         mlr = gen_int_mode (ml, compute_mode);
4436                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4437                                       + shift_cost (speed, compute_mode, size - 1)
4438                                       + 2 * add_cost (speed, compute_mode));
4439                         t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4440                                                    NULL_RTX, 0,
4441                                                    max_cost - extra_cost);
4442                         if (t1 == 0)
4443                           goto fail1;
4444                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4445                                                           t1, op0),
4446                                             NULL_RTX);
4447                         t3 = expand_shift
4448                           (RSHIFT_EXPR, compute_mode, t2,
4449                            post_shift, NULL_RTX, 0);
4450                         t4 = expand_shift
4451                           (RSHIFT_EXPR, compute_mode, op0,
4452                            size - 1, NULL_RTX, 0);
4453                         if (d < 0)
4454                           quotient
4455                             = force_operand (gen_rtx_MINUS (compute_mode,
4456                                                             t4, t3),
4457                                              tquotient);
4458                         else
4459                           quotient
4460                             = force_operand (gen_rtx_MINUS (compute_mode,
4461                                                             t3, t4),
4462                                              tquotient);
4463                       }
4464                   }
4465                 else            /* Too wide mode to use tricky code */
4466                   break;
4467
4468                 insn = get_last_insn ();
4469                 if (insn != last)
4470                   set_dst_reg_note (insn, REG_EQUAL,
4471                                     gen_rtx_DIV (compute_mode, op0, op1),
4472                                     quotient);
4473               }
4474             break;
4475           }
4476       fail1:
4477         delete_insns_since (last);
4478         break;
4479
4480       case FLOOR_DIV_EXPR:
4481       case FLOOR_MOD_EXPR:
4482       /* We will come here only for signed operations.  */
4483         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4484           {
4485             unsigned HOST_WIDE_INT mh, ml;
4486             int pre_shift, lgup, post_shift;
4487             HOST_WIDE_INT d = INTVAL (op1);
4488
4489             if (d > 0)
4490               {
4491                 /* We could just as easily deal with negative constants here,
4492                    but it does not seem worth the trouble for GCC 2.6.  */
4493                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4494                   {
4495                     pre_shift = floor_log2 (d);
4496                     if (rem_flag)
4497                       {
4498                         unsigned HOST_WIDE_INT mask
4499                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4500                         remainder = expand_binop
4501                           (compute_mode, and_optab, op0,
4502                            gen_int_mode (mask, compute_mode),
4503                            remainder, 0, OPTAB_LIB_WIDEN);
4504                         if (remainder)
4505                           return gen_lowpart (mode, remainder);
4506                       }
4507                     quotient = expand_shift
4508                       (RSHIFT_EXPR, compute_mode, op0,
4509                        pre_shift, tquotient, 0);
4510                   }
4511                 else
4512                   {
4513                     rtx t1, t2, t3, t4;
4514
4515                     mh = choose_multiplier (d, size, size - 1,
4516                                             &ml, &post_shift, &lgup);
4517                     gcc_assert (!mh);
4518
4519                     if (post_shift < BITS_PER_WORD
4520                         && size - 1 < BITS_PER_WORD)
4521                       {
4522                         t1 = expand_shift
4523                           (RSHIFT_EXPR, compute_mode, op0,
4524                            size - 1, NULL_RTX, 0);
4525                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4526                                            NULL_RTX, 0, OPTAB_WIDEN);
4527                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4528                                       + shift_cost (speed, compute_mode, size - 1)
4529                                       + 2 * add_cost (speed, compute_mode));
4530                         t3 = expmed_mult_highpart
4531                           (compute_mode, t2, gen_int_mode (ml, compute_mode),
4532                            NULL_RTX, 1, max_cost - extra_cost);
4533                         if (t3 != 0)
4534                           {
4535                             t4 = expand_shift
4536                               (RSHIFT_EXPR, compute_mode, t3,
4537                                post_shift, NULL_RTX, 1);
4538                             quotient = expand_binop (compute_mode, xor_optab,
4539                                                      t4, t1, tquotient, 0,
4540                                                      OPTAB_WIDEN);
4541                           }
4542                       }
4543                   }
4544               }
4545             else
4546               {
4547                 rtx nsign, t1, t2, t3, t4;
4548                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4549                                                   op0, constm1_rtx), NULL_RTX);
4550                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4551                                    0, OPTAB_WIDEN);
4552                 nsign = expand_shift
4553                   (RSHIFT_EXPR, compute_mode, t2,
4554                    size - 1, NULL_RTX, 0);
4555                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4556                                     NULL_RTX);
4557                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4558                                     NULL_RTX, 0);
4559                 if (t4)
4560                   {
4561                     rtx t5;
4562                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4563                                       NULL_RTX, 0);
4564                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4565                                                             t4, t5),
4566                                               tquotient);
4567                   }
4568               }
4569           }
4570
4571         if (quotient != 0)
4572           break;
4573         delete_insns_since (last);
4574
4575         /* Try using an instruction that produces both the quotient and
4576            remainder, using truncation.  We can easily compensate the quotient
4577            or remainder to get floor rounding, once we have the remainder.
4578            Notice that we compute also the final remainder value here,
4579            and return the result right away.  */
4580         if (target == 0 || GET_MODE (target) != compute_mode)
4581           target = gen_reg_rtx (compute_mode);
4582
4583         if (rem_flag)
4584           {
4585             remainder
4586               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4587             quotient = gen_reg_rtx (compute_mode);
4588           }
4589         else
4590           {
4591             quotient
4592               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4593             remainder = gen_reg_rtx (compute_mode);
4594           }
4595
4596         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4597                                  quotient, remainder, 0))
4598           {
4599             /* This could be computed with a branch-less sequence.
4600                Save that for later.  */
4601             rtx tem;
4602             rtx_code_label *label = gen_label_rtx ();
4603             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4604             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4605                                 NULL_RTX, 0, OPTAB_WIDEN);
4606             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4607             expand_dec (quotient, const1_rtx);
4608             expand_inc (remainder, op1);
4609             emit_label (label);
4610             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4611           }
4612
4613         /* No luck with division elimination or divmod.  Have to do it
4614            by conditionally adjusting op0 *and* the result.  */
4615         {
4616           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4617           rtx adjusted_op0;
4618           rtx tem;
4619
4620           quotient = gen_reg_rtx (compute_mode);
4621           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4622           label1 = gen_label_rtx ();
4623           label2 = gen_label_rtx ();
4624           label3 = gen_label_rtx ();
4625           label4 = gen_label_rtx ();
4626           label5 = gen_label_rtx ();
4627           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4628           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4629           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4630                               quotient, 0, OPTAB_LIB_WIDEN);
4631           if (tem != quotient)
4632             emit_move_insn (quotient, tem);
4633           emit_jump_insn (targetm.gen_jump (label5));
4634           emit_barrier ();
4635           emit_label (label1);
4636           expand_inc (adjusted_op0, const1_rtx);
4637           emit_jump_insn (targetm.gen_jump (label4));
4638           emit_barrier ();
4639           emit_label (label2);
4640           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4641           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4642                               quotient, 0, OPTAB_LIB_WIDEN);
4643           if (tem != quotient)
4644             emit_move_insn (quotient, tem);
4645           emit_jump_insn (targetm.gen_jump (label5));
4646           emit_barrier ();
4647           emit_label (label3);
4648           expand_dec (adjusted_op0, const1_rtx);
4649           emit_label (label4);
4650           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4651                               quotient, 0, OPTAB_LIB_WIDEN);
4652           if (tem != quotient)
4653             emit_move_insn (quotient, tem);
4654           expand_dec (quotient, const1_rtx);
4655           emit_label (label5);
4656         }
4657         break;
4658
4659       case CEIL_DIV_EXPR:
4660       case CEIL_MOD_EXPR:
4661         if (unsignedp)
4662           {
4663             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4664               {
4665                 rtx t1, t2, t3;
4666                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4667                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4668                                    floor_log2 (d), tquotient, 1);
4669                 t2 = expand_binop (compute_mode, and_optab, op0,
4670                                    gen_int_mode (d - 1, compute_mode),
4671                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4672                 t3 = gen_reg_rtx (compute_mode);
4673                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4674                                       compute_mode, 1, 1);
4675                 if (t3 == 0)
4676                   {
4677                     rtx_code_label *lab;
4678                     lab = gen_label_rtx ();
4679                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4680                     expand_inc (t1, const1_rtx);
4681                     emit_label (lab);
4682                     quotient = t1;
4683                   }
4684                 else
4685                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4686                                                           t1, t3),
4687                                             tquotient);
4688                 break;
4689               }
4690
4691             /* Try using an instruction that produces both the quotient and
4692                remainder, using truncation.  We can easily compensate the
4693                quotient or remainder to get ceiling rounding, once we have the
4694                remainder.  Notice that we compute also the final remainder
4695                value here, and return the result right away.  */
4696             if (target == 0 || GET_MODE (target) != compute_mode)
4697               target = gen_reg_rtx (compute_mode);
4698
4699             if (rem_flag)
4700               {
4701                 remainder = (REG_P (target)
4702                              ? target : gen_reg_rtx (compute_mode));
4703                 quotient = gen_reg_rtx (compute_mode);
4704               }
4705             else
4706               {
4707                 quotient = (REG_P (target)
4708                             ? target : gen_reg_rtx (compute_mode));
4709                 remainder = gen_reg_rtx (compute_mode);
4710               }
4711
4712             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4713                                      remainder, 1))
4714               {
4715                 /* This could be computed with a branch-less sequence.
4716                    Save that for later.  */
4717                 rtx_code_label *label = gen_label_rtx ();
4718                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4719                                  compute_mode, label);
4720                 expand_inc (quotient, const1_rtx);
4721                 expand_dec (remainder, op1);
4722                 emit_label (label);
4723                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4724               }
4725
4726             /* No luck with division elimination or divmod.  Have to do it
4727                by conditionally adjusting op0 *and* the result.  */
4728             {
4729               rtx_code_label *label1, *label2;
4730               rtx adjusted_op0, tem;
4731
4732               quotient = gen_reg_rtx (compute_mode);
4733               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4734               label1 = gen_label_rtx ();
4735               label2 = gen_label_rtx ();
4736               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4737                                compute_mode, label1);
4738               emit_move_insn  (quotient, const0_rtx);
4739               emit_jump_insn (targetm.gen_jump (label2));
4740               emit_barrier ();
4741               emit_label (label1);
4742               expand_dec (adjusted_op0, const1_rtx);
4743               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4744                                   quotient, 1, OPTAB_LIB_WIDEN);
4745               if (tem != quotient)
4746                 emit_move_insn (quotient, tem);
4747               expand_inc (quotient, const1_rtx);
4748               emit_label (label2);
4749             }
4750           }
4751         else /* signed */
4752           {
4753             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4754                 && INTVAL (op1) >= 0)
4755               {
4756                 /* This is extremely similar to the code for the unsigned case
4757                    above.  For 2.7 we should merge these variants, but for
4758                    2.6.1 I don't want to touch the code for unsigned since that
4759                    get used in C.  The signed case will only be used by other
4760                    languages (Ada).  */
4761
4762                 rtx t1, t2, t3;
4763                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4764                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4765                                    floor_log2 (d), tquotient, 0);
4766                 t2 = expand_binop (compute_mode, and_optab, op0,
4767                                    gen_int_mode (d - 1, compute_mode),
4768                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4769                 t3 = gen_reg_rtx (compute_mode);
4770                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4771                                       compute_mode, 1, 1);
4772                 if (t3 == 0)
4773                   {
4774                     rtx_code_label *lab;
4775                     lab = gen_label_rtx ();
4776                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4777                     expand_inc (t1, const1_rtx);
4778                     emit_label (lab);
4779                     quotient = t1;
4780                   }
4781                 else
4782                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4783                                                           t1, t3),
4784                                             tquotient);
4785                 break;
4786               }
4787
4788             /* Try using an instruction that produces both the quotient and
4789                remainder, using truncation.  We can easily compensate the
4790                quotient or remainder to get ceiling rounding, once we have the
4791                remainder.  Notice that we compute also the final remainder
4792                value here, and return the result right away.  */
4793             if (target == 0 || GET_MODE (target) != compute_mode)
4794               target = gen_reg_rtx (compute_mode);
4795             if (rem_flag)
4796               {
4797                 remainder= (REG_P (target)
4798                             ? target : gen_reg_rtx (compute_mode));
4799                 quotient = gen_reg_rtx (compute_mode);
4800               }
4801             else
4802               {
4803                 quotient = (REG_P (target)
4804                             ? target : gen_reg_rtx (compute_mode));
4805                 remainder = gen_reg_rtx (compute_mode);
4806               }
4807
4808             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4809                                      remainder, 0))
4810               {
4811                 /* This could be computed with a branch-less sequence.
4812                    Save that for later.  */
4813                 rtx tem;
4814                 rtx_code_label *label = gen_label_rtx ();
4815                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4816                                  compute_mode, label);
4817                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4818                                     NULL_RTX, 0, OPTAB_WIDEN);
4819                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4820                 expand_inc (quotient, const1_rtx);
4821                 expand_dec (remainder, op1);
4822                 emit_label (label);
4823                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4824               }
4825
4826             /* No luck with division elimination or divmod.  Have to do it
4827                by conditionally adjusting op0 *and* the result.  */
4828             {
4829               rtx_code_label *label1, *label2, *label3, *label4, *label5;
4830               rtx adjusted_op0;
4831               rtx tem;
4832
4833               quotient = gen_reg_rtx (compute_mode);
4834               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4835               label1 = gen_label_rtx ();
4836               label2 = gen_label_rtx ();
4837               label3 = gen_label_rtx ();
4838               label4 = gen_label_rtx ();
4839               label5 = gen_label_rtx ();
4840               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4841               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4842                                compute_mode, label1);
4843               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4844                                   quotient, 0, OPTAB_LIB_WIDEN);
4845               if (tem != quotient)
4846                 emit_move_insn (quotient, tem);
4847               emit_jump_insn (targetm.gen_jump (label5));
4848               emit_barrier ();
4849               emit_label (label1);
4850               expand_dec (adjusted_op0, const1_rtx);
4851               emit_jump_insn (targetm.gen_jump (label4));
4852               emit_barrier ();
4853               emit_label (label2);
4854               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4855                                compute_mode, label3);
4856               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4857                                   quotient, 0, OPTAB_LIB_WIDEN);
4858               if (tem != quotient)
4859                 emit_move_insn (quotient, tem);
4860               emit_jump_insn (targetm.gen_jump (label5));
4861               emit_barrier ();
4862               emit_label (label3);
4863               expand_inc (adjusted_op0, const1_rtx);
4864               emit_label (label4);
4865               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4866                                   quotient, 0, OPTAB_LIB_WIDEN);
4867               if (tem != quotient)
4868                 emit_move_insn (quotient, tem);
4869               expand_inc (quotient, const1_rtx);
4870               emit_label (label5);
4871             }
4872           }
4873         break;
4874
4875       case EXACT_DIV_EXPR:
4876         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4877           {
4878             HOST_WIDE_INT d = INTVAL (op1);
4879             unsigned HOST_WIDE_INT ml;
4880             int pre_shift;
4881             rtx t1;
4882
4883             pre_shift = floor_log2 (d & -d);
4884             ml = invert_mod2n (d >> pre_shift, size);
4885             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4886                                pre_shift, NULL_RTX, unsignedp);
4887             quotient = expand_mult (compute_mode, t1,
4888                                     gen_int_mode (ml, compute_mode),
4889                                     NULL_RTX, 1);
4890
4891             insn = get_last_insn ();
4892             set_dst_reg_note (insn, REG_EQUAL,
4893                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4894                                               compute_mode, op0, op1),
4895                               quotient);
4896           }
4897         break;
4898
4899       case ROUND_DIV_EXPR:
4900       case ROUND_MOD_EXPR:
4901         if (unsignedp)
4902           {
4903             rtx tem;
4904             rtx_code_label *label;
4905             label = gen_label_rtx ();
4906             quotient = gen_reg_rtx (compute_mode);
4907             remainder = gen_reg_rtx (compute_mode);
4908             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4909               {
4910                 rtx tem;
4911                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4912                                          quotient, 1, OPTAB_LIB_WIDEN);
4913                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4914                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4915                                           remainder, 1, OPTAB_LIB_WIDEN);
4916               }
4917             tem = plus_constant (compute_mode, op1, -1);
4918             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4919             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4920             expand_inc (quotient, const1_rtx);
4921             expand_dec (remainder, op1);
4922             emit_label (label);
4923           }
4924         else
4925           {
4926             rtx abs_rem, abs_op1, tem, mask;
4927             rtx_code_label *label;
4928             label = gen_label_rtx ();
4929             quotient = gen_reg_rtx (compute_mode);
4930             remainder = gen_reg_rtx (compute_mode);
4931             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4932               {
4933                 rtx tem;
4934                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4935                                          quotient, 0, OPTAB_LIB_WIDEN);
4936                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4937                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4938                                           remainder, 0, OPTAB_LIB_WIDEN);
4939               }
4940             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4941             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4942             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4943                                 1, NULL_RTX, 1);
4944             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4945             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4946                                 NULL_RTX, 0, OPTAB_WIDEN);
4947             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4948                                  size - 1, NULL_RTX, 0);
4949             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4950                                 NULL_RTX, 0, OPTAB_WIDEN);
4951             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4952                                 NULL_RTX, 0, OPTAB_WIDEN);
4953             expand_inc (quotient, tem);
4954             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4955                                 NULL_RTX, 0, OPTAB_WIDEN);
4956             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4957                                 NULL_RTX, 0, OPTAB_WIDEN);
4958             expand_dec (remainder, tem);
4959             emit_label (label);
4960           }
4961         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4962
4963       default:
4964         gcc_unreachable ();
4965       }
4966
4967   if (quotient == 0)
4968     {
4969       if (target && GET_MODE (target) != compute_mode)
4970         target = 0;
4971
4972       if (rem_flag)
4973         {
4974           /* Try to produce the remainder without producing the quotient.
4975              If we seem to have a divmod pattern that does not require widening,
4976              don't try widening here.  We should really have a WIDEN argument
4977              to expand_twoval_binop, since what we'd really like to do here is
4978              1) try a mod insn in compute_mode
4979              2) try a divmod insn in compute_mode
4980              3) try a div insn in compute_mode and multiply-subtract to get
4981                 remainder
4982              4) try the same things with widening allowed.  */
4983           remainder
4984             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4985                                  op0, op1, target,
4986                                  unsignedp,
4987                                  ((optab_handler (optab2, compute_mode)
4988                                    != CODE_FOR_nothing)
4989                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4990           if (remainder == 0)
4991             {
4992               /* No luck there.  Can we do remainder and divide at once
4993                  without a library call?  */
4994               remainder = gen_reg_rtx (compute_mode);
4995               if (! expand_twoval_binop ((unsignedp
4996                                           ? udivmod_optab
4997                                           : sdivmod_optab),
4998                                          op0, op1,
4999                                          NULL_RTX, remainder, unsignedp))
5000                 remainder = 0;
5001             }
5002
5003           if (remainder)
5004             return gen_lowpart (mode, remainder);
5005         }
5006
5007       /* Produce the quotient.  Try a quotient insn, but not a library call.
5008          If we have a divmod in this mode, use it in preference to widening
5009          the div (for this test we assume it will not fail). Note that optab2
5010          is set to the one of the two optabs that the call below will use.  */
5011       quotient
5012         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5013                              op0, op1, rem_flag ? NULL_RTX : target,
5014                              unsignedp,
5015                              ((optab_handler (optab2, compute_mode)
5016                                != CODE_FOR_nothing)
5017                               ? OPTAB_DIRECT : OPTAB_WIDEN));
5018
5019       if (quotient == 0)
5020         {
5021           /* No luck there.  Try a quotient-and-remainder insn,
5022              keeping the quotient alone.  */
5023           quotient = gen_reg_rtx (compute_mode);
5024           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5025                                      op0, op1,
5026                                      quotient, NULL_RTX, unsignedp))
5027             {
5028               quotient = 0;
5029               if (! rem_flag)
5030                 /* Still no luck.  If we are not computing the remainder,
5031                    use a library call for the quotient.  */
5032                 quotient = sign_expand_binop (compute_mode,
5033                                               udiv_optab, sdiv_optab,
5034                                               op0, op1, target,
5035                                               unsignedp, OPTAB_LIB_WIDEN);
5036             }
5037         }
5038     }
5039
5040   if (rem_flag)
5041     {
5042       if (target && GET_MODE (target) != compute_mode)
5043         target = 0;
5044
5045       if (quotient == 0)
5046         {
5047           /* No divide instruction either.  Use library for remainder.  */
5048           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5049                                          op0, op1, target,
5050                                          unsignedp, OPTAB_LIB_WIDEN);
5051           /* No remainder function.  Try a quotient-and-remainder
5052              function, keeping the remainder.  */
5053           if (!remainder)
5054             {
5055               remainder = gen_reg_rtx (compute_mode);
5056               if (!expand_twoval_binop_libfunc
5057                   (unsignedp ? udivmod_optab : sdivmod_optab,
5058                    op0, op1,
5059                    NULL_RTX, remainder,
5060                    unsignedp ? UMOD : MOD))
5061                 remainder = NULL_RTX;
5062             }
5063         }
5064       else
5065         {
5066           /* We divided.  Now finish doing X - Y * (X / Y).  */
5067           remainder = expand_mult (compute_mode, quotient, op1,
5068                                    NULL_RTX, unsignedp);
5069           remainder = expand_binop (compute_mode, sub_optab, op0,
5070                                     remainder, target, unsignedp,
5071                                     OPTAB_LIB_WIDEN);
5072         }
5073     }
5074
5075   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5076 }
5077 \f
5078 /* Return a tree node with data type TYPE, describing the value of X.
5079    Usually this is an VAR_DECL, if there is no obvious better choice.
5080    X may be an expression, however we only support those expressions
5081    generated by loop.c.  */
5082
5083 tree
5084 make_tree (tree type, rtx x)
5085 {
5086   tree t;
5087
5088   switch (GET_CODE (x))
5089     {
5090     case CONST_INT:
5091     case CONST_WIDE_INT:
5092       t = wide_int_to_tree (type, std::make_pair (x, TYPE_MODE (type)));
5093       return t;
5094
5095     case CONST_DOUBLE:
5096       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5097       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5098         t = wide_int_to_tree (type,
5099                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5100                                                     HOST_BITS_PER_WIDE_INT * 2));
5101       else
5102         t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5103
5104       return t;
5105
5106     case CONST_VECTOR:
5107       {
5108         int units = CONST_VECTOR_NUNITS (x);
5109         tree itype = TREE_TYPE (type);
5110         tree *elts;
5111         int i;
5112
5113         /* Build a tree with vector elements.  */
5114         elts = XALLOCAVEC (tree, units);
5115         for (i = units - 1; i >= 0; --i)
5116           {
5117             rtx elt = CONST_VECTOR_ELT (x, i);
5118             elts[i] = make_tree (itype, elt);
5119           }
5120
5121         return build_vector (type, elts);
5122       }
5123
5124     case PLUS:
5125       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5126                           make_tree (type, XEXP (x, 1)));
5127
5128     case MINUS:
5129       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5130                           make_tree (type, XEXP (x, 1)));
5131
5132     case NEG:
5133       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5134
5135     case MULT:
5136       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5137                           make_tree (type, XEXP (x, 1)));
5138
5139     case ASHIFT:
5140       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5141                           make_tree (type, XEXP (x, 1)));
5142
5143     case LSHIFTRT:
5144       t = unsigned_type_for (type);
5145       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5146                                          make_tree (t, XEXP (x, 0)),
5147                                          make_tree (type, XEXP (x, 1))));
5148
5149     case ASHIFTRT:
5150       t = signed_type_for (type);
5151       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5152                                          make_tree (t, XEXP (x, 0)),
5153                                          make_tree (type, XEXP (x, 1))));
5154
5155     case DIV:
5156       if (TREE_CODE (type) != REAL_TYPE)
5157         t = signed_type_for (type);
5158       else
5159         t = type;
5160
5161       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5162                                          make_tree (t, XEXP (x, 0)),
5163                                          make_tree (t, XEXP (x, 1))));
5164     case UDIV:
5165       t = unsigned_type_for (type);
5166       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5167                                          make_tree (t, XEXP (x, 0)),
5168                                          make_tree (t, XEXP (x, 1))));
5169
5170     case SIGN_EXTEND:
5171     case ZERO_EXTEND:
5172       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5173                                           GET_CODE (x) == ZERO_EXTEND);
5174       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5175
5176     case CONST:
5177       return make_tree (type, XEXP (x, 0));
5178
5179     case SYMBOL_REF:
5180       t = SYMBOL_REF_DECL (x);
5181       if (t)
5182         return fold_convert (type, build_fold_addr_expr (t));
5183       /* else fall through.  */
5184
5185     default:
5186       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5187
5188       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5189          address mode to pointer mode.  */
5190       if (POINTER_TYPE_P (type))
5191         x = convert_memory_address_addr_space
5192               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5193
5194       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5195          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5196       t->decl_with_rtl.rtl = x;
5197
5198       return t;
5199     }
5200 }
5201 \f
5202 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5203    and returning TARGET.
5204
5205    If TARGET is 0, a pseudo-register or constant is returned.  */
5206
5207 rtx
5208 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5209 {
5210   rtx tem = 0;
5211
5212   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5213     tem = simplify_binary_operation (AND, mode, op0, op1);
5214   if (tem == 0)
5215     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5216
5217   if (target == 0)
5218     target = tem;
5219   else if (tem != target)
5220     emit_move_insn (target, tem);
5221   return target;
5222 }
5223
5224 /* Helper function for emit_store_flag.  */
5225 rtx
5226 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5227              machine_mode mode, machine_mode compare_mode,
5228              int unsignedp, rtx x, rtx y, int normalizep,
5229              machine_mode target_mode)
5230 {
5231   struct expand_operand ops[4];
5232   rtx op0, comparison, subtarget;
5233   rtx_insn *last;
5234   machine_mode result_mode = targetm.cstore_mode (icode);
5235
5236   last = get_last_insn ();
5237   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5238   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5239   if (!x || !y)
5240     {
5241       delete_insns_since (last);
5242       return NULL_RTX;
5243     }
5244
5245   if (target_mode == VOIDmode)
5246     target_mode = result_mode;
5247   if (!target)
5248     target = gen_reg_rtx (target_mode);
5249
5250   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5251
5252   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5253   create_fixed_operand (&ops[1], comparison);
5254   create_fixed_operand (&ops[2], x);
5255   create_fixed_operand (&ops[3], y);
5256   if (!maybe_expand_insn (icode, 4, ops))
5257     {
5258       delete_insns_since (last);
5259       return NULL_RTX;
5260     }
5261   subtarget = ops[0].value;
5262
5263   /* If we are converting to a wider mode, first convert to
5264      TARGET_MODE, then normalize.  This produces better combining
5265      opportunities on machines that have a SIGN_EXTRACT when we are
5266      testing a single bit.  This mostly benefits the 68k.
5267
5268      If STORE_FLAG_VALUE does not have the sign bit set when
5269      interpreted in MODE, we can do this conversion as unsigned, which
5270      is usually more efficient.  */
5271   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5272     {
5273       convert_move (target, subtarget,
5274                     val_signbit_known_clear_p (result_mode,
5275                                                STORE_FLAG_VALUE));
5276       op0 = target;
5277       result_mode = target_mode;
5278     }
5279   else
5280     op0 = subtarget;
5281
5282   /* If we want to keep subexpressions around, don't reuse our last
5283      target.  */
5284   if (optimize)
5285     subtarget = 0;
5286
5287   /* Now normalize to the proper value in MODE.  Sometimes we don't
5288      have to do anything.  */
5289   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5290     ;
5291   /* STORE_FLAG_VALUE might be the most negative number, so write
5292      the comparison this way to avoid a compiler-time warning.  */
5293   else if (- normalizep == STORE_FLAG_VALUE)
5294     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5295
5296   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5297      it hard to use a value of just the sign bit due to ANSI integer
5298      constant typing rules.  */
5299   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5300     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5301                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5302                         normalizep == 1);
5303   else
5304     {
5305       gcc_assert (STORE_FLAG_VALUE & 1);
5306
5307       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5308       if (normalizep == -1)
5309         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5310     }
5311
5312   /* If we were converting to a smaller mode, do the conversion now.  */
5313   if (target_mode != result_mode)
5314     {
5315       convert_move (target, op0, 0);
5316       return target;
5317     }
5318   else
5319     return op0;
5320 }
5321
5322
5323 /* A subroutine of emit_store_flag only including "tricks" that do not
5324    need a recursive call.  These are kept separate to avoid infinite
5325    loops.  */
5326
5327 static rtx
5328 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5329                    machine_mode mode, int unsignedp, int normalizep,
5330                    machine_mode target_mode)
5331 {
5332   rtx subtarget;
5333   enum insn_code icode;
5334   machine_mode compare_mode;
5335   enum mode_class mclass;
5336   enum rtx_code scode;
5337
5338   if (unsignedp)
5339     code = unsigned_condition (code);
5340   scode = swap_condition (code);
5341
5342   /* If one operand is constant, make it the second one.  Only do this
5343      if the other operand is not constant as well.  */
5344
5345   if (swap_commutative_operands_p (op0, op1))
5346     {
5347       std::swap (op0, op1);
5348       code = swap_condition (code);
5349     }
5350
5351   if (mode == VOIDmode)
5352     mode = GET_MODE (op0);
5353
5354   /* For some comparisons with 1 and -1, we can convert this to
5355      comparisons with zero.  This will often produce more opportunities for
5356      store-flag insns.  */
5357
5358   switch (code)
5359     {
5360     case LT:
5361       if (op1 == const1_rtx)
5362         op1 = const0_rtx, code = LE;
5363       break;
5364     case LE:
5365       if (op1 == constm1_rtx)
5366         op1 = const0_rtx, code = LT;
5367       break;
5368     case GE:
5369       if (op1 == const1_rtx)
5370         op1 = const0_rtx, code = GT;
5371       break;
5372     case GT:
5373       if (op1 == constm1_rtx)
5374         op1 = const0_rtx, code = GE;
5375       break;
5376     case GEU:
5377       if (op1 == const1_rtx)
5378         op1 = const0_rtx, code = NE;
5379       break;
5380     case LTU:
5381       if (op1 == const1_rtx)
5382         op1 = const0_rtx, code = EQ;
5383       break;
5384     default:
5385       break;
5386     }
5387
5388   /* If we are comparing a double-word integer with zero or -1, we can
5389      convert the comparison into one involving a single word.  */
5390   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5391       && GET_MODE_CLASS (mode) == MODE_INT
5392       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5393     {
5394       rtx tem;
5395       if ((code == EQ || code == NE)
5396           && (op1 == const0_rtx || op1 == constm1_rtx))
5397         {
5398           rtx op00, op01;
5399
5400           /* Do a logical OR or AND of the two words and compare the
5401              result.  */
5402           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5403           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5404           tem = expand_binop (word_mode,
5405                               op1 == const0_rtx ? ior_optab : and_optab,
5406                               op00, op01, NULL_RTX, unsignedp,
5407                               OPTAB_DIRECT);
5408
5409           if (tem != 0)
5410             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5411                                    unsignedp, normalizep);
5412         }
5413       else if ((code == LT || code == GE) && op1 == const0_rtx)
5414         {
5415           rtx op0h;
5416
5417           /* If testing the sign bit, can just test on high word.  */
5418           op0h = simplify_gen_subreg (word_mode, op0, mode,
5419                                       subreg_highpart_offset (word_mode,
5420                                                               mode));
5421           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5422                                  unsignedp, normalizep);
5423         }
5424       else
5425         tem = NULL_RTX;
5426
5427       if (tem)
5428         {
5429           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5430             return tem;
5431           if (!target)
5432             target = gen_reg_rtx (target_mode);
5433
5434           convert_move (target, tem,
5435                         !val_signbit_known_set_p (word_mode,
5436                                                   (normalizep ? normalizep
5437                                                    : STORE_FLAG_VALUE)));
5438           return target;
5439         }
5440     }
5441
5442   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5443      complement of A (for GE) and shifting the sign bit to the low bit.  */
5444   if (op1 == const0_rtx && (code == LT || code == GE)
5445       && GET_MODE_CLASS (mode) == MODE_INT
5446       && (normalizep || STORE_FLAG_VALUE == 1
5447           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5448     {
5449       subtarget = target;
5450
5451       if (!target)
5452         target_mode = mode;
5453
5454       /* If the result is to be wider than OP0, it is best to convert it
5455          first.  If it is to be narrower, it is *incorrect* to convert it
5456          first.  */
5457       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5458         {
5459           op0 = convert_modes (target_mode, mode, op0, 0);
5460           mode = target_mode;
5461         }
5462
5463       if (target_mode != mode)
5464         subtarget = 0;
5465
5466       if (code == GE)
5467         op0 = expand_unop (mode, one_cmpl_optab, op0,
5468                            ((STORE_FLAG_VALUE == 1 || normalizep)
5469                             ? 0 : subtarget), 0);
5470
5471       if (STORE_FLAG_VALUE == 1 || normalizep)
5472         /* If we are supposed to produce a 0/1 value, we want to do
5473            a logical shift from the sign bit to the low-order bit; for
5474            a -1/0 value, we do an arithmetic shift.  */
5475         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5476                             GET_MODE_BITSIZE (mode) - 1,
5477                             subtarget, normalizep != -1);
5478
5479       if (mode != target_mode)
5480         op0 = convert_modes (target_mode, mode, op0, 0);
5481
5482       return op0;
5483     }
5484
5485   mclass = GET_MODE_CLASS (mode);
5486   for (compare_mode = mode; compare_mode != VOIDmode;
5487        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5488     {
5489      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5490      icode = optab_handler (cstore_optab, optab_mode);
5491      if (icode != CODE_FOR_nothing)
5492         {
5493           do_pending_stack_adjust ();
5494           rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5495                                  unsignedp, op0, op1, normalizep, target_mode);
5496           if (tem)
5497             return tem;
5498
5499           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5500             {
5501               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5502                                  unsignedp, op1, op0, normalizep, target_mode);
5503               if (tem)
5504                 return tem;
5505             }
5506           break;
5507         }
5508     }
5509
5510   return 0;
5511 }
5512
5513 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5514    and storing in TARGET.  Normally return TARGET.
5515    Return 0 if that cannot be done.
5516
5517    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5518    it is VOIDmode, they cannot both be CONST_INT.
5519
5520    UNSIGNEDP is for the case where we have to widen the operands
5521    to perform the operation.  It says to use zero-extension.
5522
5523    NORMALIZEP is 1 if we should convert the result to be either zero
5524    or one.  Normalize is -1 if we should convert the result to be
5525    either zero or -1.  If NORMALIZEP is zero, the result will be left
5526    "raw" out of the scc insn.  */
5527
5528 rtx
5529 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5530                  machine_mode mode, int unsignedp, int normalizep)
5531 {
5532   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5533   enum rtx_code rcode;
5534   rtx subtarget;
5535   rtx tem, trueval;
5536   rtx_insn *last;
5537
5538   /* If we compare constants, we shouldn't use a store-flag operation,
5539      but a constant load.  We can get there via the vanilla route that
5540      usually generates a compare-branch sequence, but will in this case
5541      fold the comparison to a constant, and thus elide the branch.  */
5542   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5543     return NULL_RTX;
5544
5545   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5546                            target_mode);
5547   if (tem)
5548     return tem;
5549
5550   /* If we reached here, we can't do this with a scc insn, however there
5551      are some comparisons that can be done in other ways.  Don't do any
5552      of these cases if branches are very cheap.  */
5553   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5554     return 0;
5555
5556   /* See what we need to return.  We can only return a 1, -1, or the
5557      sign bit.  */
5558
5559   if (normalizep == 0)
5560     {
5561       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5562         normalizep = STORE_FLAG_VALUE;
5563
5564       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5565         ;
5566       else
5567         return 0;
5568     }
5569
5570   last = get_last_insn ();
5571
5572   /* If optimizing, use different pseudo registers for each insn, instead
5573      of reusing the same pseudo.  This leads to better CSE, but slows
5574      down the compiler, since there are more pseudos */
5575   subtarget = (!optimize
5576                && (target_mode == mode)) ? target : NULL_RTX;
5577   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5578
5579   /* For floating-point comparisons, try the reverse comparison or try
5580      changing the "orderedness" of the comparison.  */
5581   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5582     {
5583       enum rtx_code first_code;
5584       bool and_them;
5585
5586       rcode = reverse_condition_maybe_unordered (code);
5587       if (can_compare_p (rcode, mode, ccp_store_flag)
5588           && (code == ORDERED || code == UNORDERED
5589               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5590               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5591         {
5592           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5593                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5594
5595           /* For the reverse comparison, use either an addition or a XOR.  */
5596           if (want_add
5597               && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5598                            optimize_insn_for_speed_p ()) == 0)
5599             {
5600               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5601                                        STORE_FLAG_VALUE, target_mode);
5602               if (tem)
5603                 return expand_binop (target_mode, add_optab, tem,
5604                                      gen_int_mode (normalizep, target_mode),
5605                                      target, 0, OPTAB_WIDEN);
5606             }
5607           else if (!want_add
5608                    && rtx_cost (trueval, mode, XOR, 1,
5609                                 optimize_insn_for_speed_p ()) == 0)
5610             {
5611               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5612                                        normalizep, target_mode);
5613               if (tem)
5614                 return expand_binop (target_mode, xor_optab, tem, trueval,
5615                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5616             }
5617         }
5618
5619       delete_insns_since (last);
5620
5621       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5622       if (code == ORDERED || code == UNORDERED)
5623         return 0;
5624
5625       and_them = split_comparison (code, mode, &first_code, &code);
5626
5627       /* If there are no NaNs, the first comparison should always fall through.
5628          Effectively change the comparison to the other one.  */
5629       if (!HONOR_NANS (mode))
5630         {
5631           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5632           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5633                                     target_mode);
5634         }
5635
5636       if (!HAVE_conditional_move)
5637         return 0;
5638
5639       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5640          conditional move.  */
5641       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5642                                normalizep, target_mode);
5643       if (tem == 0)
5644         return 0;
5645
5646       if (and_them)
5647         tem = emit_conditional_move (target, code, op0, op1, mode,
5648                                      tem, const0_rtx, GET_MODE (tem), 0);
5649       else
5650         tem = emit_conditional_move (target, code, op0, op1, mode,
5651                                      trueval, tem, GET_MODE (tem), 0);
5652
5653       if (tem == 0)
5654         delete_insns_since (last);
5655       return tem;
5656     }
5657
5658   /* The remaining tricks only apply to integer comparisons.  */
5659
5660   if (GET_MODE_CLASS (mode) != MODE_INT)
5661     return 0;
5662
5663   /* If this is an equality comparison of integers, we can try to exclusive-or
5664      (or subtract) the two operands and use a recursive call to try the
5665      comparison with zero.  Don't do any of these cases if branches are
5666      very cheap.  */
5667
5668   if ((code == EQ || code == NE) && op1 != const0_rtx)
5669     {
5670       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5671                           OPTAB_WIDEN);
5672
5673       if (tem == 0)
5674         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5675                             OPTAB_WIDEN);
5676       if (tem != 0)
5677         tem = emit_store_flag (target, code, tem, const0_rtx,
5678                                mode, unsignedp, normalizep);
5679       if (tem != 0)
5680         return tem;
5681
5682       delete_insns_since (last);
5683     }
5684
5685   /* For integer comparisons, try the reverse comparison.  However, for
5686      small X and if we'd have anyway to extend, implementing "X != 0"
5687      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5688   rcode = reverse_condition (code);
5689   if (can_compare_p (rcode, mode, ccp_store_flag)
5690       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5691             && code == NE
5692             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5693             && op1 == const0_rtx))
5694     {
5695       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5696                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5697
5698       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5699       if (want_add
5700           && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5701                        optimize_insn_for_speed_p ()) == 0)
5702         {
5703           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5704                                    STORE_FLAG_VALUE, target_mode);
5705           if (tem != 0)
5706             tem = expand_binop (target_mode, add_optab, tem,
5707                                 gen_int_mode (normalizep, target_mode),
5708                                 target, 0, OPTAB_WIDEN);
5709         }
5710       else if (!want_add
5711                && rtx_cost (trueval, mode, XOR, 1,
5712                             optimize_insn_for_speed_p ()) == 0)
5713         {
5714           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5715                                    normalizep, target_mode);
5716           if (tem != 0)
5717             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5718                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5719         }
5720
5721       if (tem != 0)
5722         return tem;
5723       delete_insns_since (last);
5724     }
5725
5726   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5727      the constant zero.  Reject all other comparisons at this point.  Only
5728      do LE and GT if branches are expensive since they are expensive on
5729      2-operand machines.  */
5730
5731   if (op1 != const0_rtx
5732       || (code != EQ && code != NE
5733           && (BRANCH_COST (optimize_insn_for_speed_p (),
5734                            false) <= 1 || (code != LE && code != GT))))
5735     return 0;
5736
5737   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5738      do the necessary operation below.  */
5739
5740   tem = 0;
5741
5742   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5743      the sign bit set.  */
5744
5745   if (code == LE)
5746     {
5747       /* This is destructive, so SUBTARGET can't be OP0.  */
5748       if (rtx_equal_p (subtarget, op0))
5749         subtarget = 0;
5750
5751       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5752                           OPTAB_WIDEN);
5753       if (tem)
5754         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5755                             OPTAB_WIDEN);
5756     }
5757
5758   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5759      number of bits in the mode of OP0, minus one.  */
5760
5761   if (code == GT)
5762     {
5763       if (rtx_equal_p (subtarget, op0))
5764         subtarget = 0;
5765
5766       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5767                           GET_MODE_BITSIZE (mode) - 1,
5768                           subtarget, 0);
5769       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5770                           OPTAB_WIDEN);
5771     }
5772
5773   if (code == EQ || code == NE)
5774     {
5775       /* For EQ or NE, one way to do the comparison is to apply an operation
5776          that converts the operand into a positive number if it is nonzero
5777          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5778          for NE we negate.  This puts the result in the sign bit.  Then we
5779          normalize with a shift, if needed.
5780
5781          Two operations that can do the above actions are ABS and FFS, so try
5782          them.  If that doesn't work, and MODE is smaller than a full word,
5783          we can use zero-extension to the wider mode (an unsigned conversion)
5784          as the operation.  */
5785
5786       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5787          that is compensated by the subsequent overflow when subtracting
5788          one / negating.  */
5789
5790       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5791         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5792       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5793         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5794       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5795         {
5796           tem = convert_modes (word_mode, mode, op0, 1);
5797           mode = word_mode;
5798         }
5799
5800       if (tem != 0)
5801         {
5802           if (code == EQ)
5803             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5804                                 0, OPTAB_WIDEN);
5805           else
5806             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5807         }
5808
5809       /* If we couldn't do it that way, for NE we can "or" the two's complement
5810          of the value with itself.  For EQ, we take the one's complement of
5811          that "or", which is an extra insn, so we only handle EQ if branches
5812          are expensive.  */
5813
5814       if (tem == 0
5815           && (code == NE
5816               || BRANCH_COST (optimize_insn_for_speed_p (),
5817                               false) > 1))
5818         {
5819           if (rtx_equal_p (subtarget, op0))
5820             subtarget = 0;
5821
5822           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5823           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5824                               OPTAB_WIDEN);
5825
5826           if (tem && code == EQ)
5827             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5828         }
5829     }
5830
5831   if (tem && normalizep)
5832     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5833                         GET_MODE_BITSIZE (mode) - 1,
5834                         subtarget, normalizep == 1);
5835
5836   if (tem)
5837     {
5838       if (!target)
5839         ;
5840       else if (GET_MODE (tem) != target_mode)
5841         {
5842           convert_move (target, tem, 0);
5843           tem = target;
5844         }
5845       else if (!subtarget)
5846         {
5847           emit_move_insn (target, tem);
5848           tem = target;
5849         }
5850     }
5851   else
5852     delete_insns_since (last);
5853
5854   return tem;
5855 }
5856
5857 /* Like emit_store_flag, but always succeeds.  */
5858
5859 rtx
5860 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5861                        machine_mode mode, int unsignedp, int normalizep)
5862 {
5863   rtx tem;
5864   rtx_code_label *label;
5865   rtx trueval, falseval;
5866
5867   /* First see if emit_store_flag can do the job.  */
5868   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5869   if (tem != 0)
5870     return tem;
5871
5872   if (!target)
5873     target = gen_reg_rtx (word_mode);
5874
5875   /* If this failed, we have to do this with set/compare/jump/set code.
5876      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5877   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5878   if (code == NE
5879       && GET_MODE_CLASS (mode) == MODE_INT
5880       && REG_P (target)
5881       && op0 == target
5882       && op1 == const0_rtx)
5883     {
5884       label = gen_label_rtx ();
5885       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
5886                                NULL_RTX, NULL, label, -1);
5887       emit_move_insn (target, trueval);
5888       emit_label (label);
5889       return target;
5890     }
5891
5892   if (!REG_P (target)
5893       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5894     target = gen_reg_rtx (GET_MODE (target));
5895
5896   /* Jump in the right direction if the target cannot implement CODE
5897      but can jump on its reverse condition.  */
5898   falseval = const0_rtx;
5899   if (! can_compare_p (code, mode, ccp_jump)
5900       && (! FLOAT_MODE_P (mode)
5901           || code == ORDERED || code == UNORDERED
5902           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5903           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5904     {
5905       enum rtx_code rcode;
5906       if (FLOAT_MODE_P (mode))
5907         rcode = reverse_condition_maybe_unordered (code);
5908       else
5909         rcode = reverse_condition (code);
5910
5911       /* Canonicalize to UNORDERED for the libcall.  */
5912       if (can_compare_p (rcode, mode, ccp_jump)
5913           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5914         {
5915           falseval = trueval;
5916           trueval = const0_rtx;
5917           code = rcode;
5918         }
5919     }
5920
5921   emit_move_insn (target, trueval);
5922   label = gen_label_rtx ();
5923   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
5924                            label, -1);
5925
5926   emit_move_insn (target, falseval);
5927   emit_label (label);
5928
5929   return target;
5930 }
5931 \f
5932 /* Perform possibly multi-word comparison and conditional jump to LABEL
5933    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5934    now a thin wrapper around do_compare_rtx_and_jump.  */
5935
5936 static void
5937 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
5938                  rtx_code_label *label)
5939 {
5940   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5941   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
5942                            NULL, label, -1);
5943 }