gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2016 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "target.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "predict.h"
  30 #include "tm_p.h"
  31 #include "expmed.h"
  32 #include "optabs.h"
  33 #include "emit-rtl.h"
  34 #include "diagnostic-core.h"
  35 #include "fold-const.h"
  36 #include "stor-layout.h"
  37 #include "dojump.h"
  38 #include "explow.h"
  39 #include "expr.h"
  40 #include "langhooks.h"
  41
  42 struct target_expmed default_target_expmed;
  43 #if SWITCHABLE_TARGET
  44 struct target_expmed *this_target_expmed = &default_target_expmed;
  45 #endif
  46
  47 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  48                                    unsigned HOST_WIDE_INT,
  49                                    unsigned HOST_WIDE_INT,
  50                                    unsigned HOST_WIDE_INT,
  51                                    rtx, bool);
  52 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
  53                                      unsigned HOST_WIDE_INT,
  54                                      rtx, bool);
  55 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  56                                    unsigned HOST_WIDE_INT,
  57                                    unsigned HOST_WIDE_INT,
  58                                    unsigned HOST_WIDE_INT,
  59                                    rtx, bool);
  60 static rtx extract_fixed_bit_field (machine_mode, rtx,
  61                                     unsigned HOST_WIDE_INT,
  62                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  63 static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
  64                                       unsigned HOST_WIDE_INT,
  65                                       unsigned HOST_WIDE_INT, rtx, int, bool);
  66 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  67 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  68                                     unsigned HOST_WIDE_INT, int, bool);
  69 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  70 static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  71 static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  72
  73 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  74    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  75    The mask is truncated if necessary to the width of mode MODE.  The
  76    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
  77
  78 static inline rtx
  79 mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
  80 {
  81   return immed_wide_int_const
  82     (wi::shifted_mask (bitpos, bitsize, complement,
  83                        GET_MODE_PRECISION (mode)), mode);
  84 }
  85
  86 /* Test whether a value is zero of a power of two.  */
  87 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
  88   (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
  89
  90 struct init_expmed_rtl
  91 {
  92   rtx reg;
  93   rtx plus;
  94   rtx neg;
  95   rtx mult;
  96   rtx sdiv;
  97   rtx udiv;
  98   rtx sdiv_32;
  99   rtx smod_32;
 100   rtx wide_mult;
 101   rtx wide_lshr;
 102   rtx wide_trunc;
 103   rtx shift;
 104   rtx shift_mult;
 105   rtx shift_add;
 106   rtx shift_sub0;
 107   rtx shift_sub1;
 108   rtx zext;
 109   rtx trunc;
 110
 111   rtx pow2[MAX_BITS_PER_WORD];
 112   rtx cint[MAX_BITS_PER_WORD];
 113 };
 114
 115 static void
 116 init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
 117                       machine_mode from_mode, bool speed)
 118 {
 119   int to_size, from_size;
 120   rtx which;
 121
 122   to_size = GET_MODE_PRECISION (to_mode);
 123   from_size = GET_MODE_PRECISION (from_mode);
 124
 125   /* Most partial integers have a precision less than the "full"
 126      integer it requires for storage.  In case one doesn't, for
 127      comparison purposes here, reduce the bit size by one in that
 128      case.  */
 129   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 130       && exact_log2 (to_size) != -1)
 131     to_size --;
 132   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 133       && exact_log2 (from_size) != -1)
 134     from_size --;
 135
 136   /* Assume cost of zero-extend and sign-extend is the same.  */
 137   which = (to_size < from_size ? all->trunc : all->zext);
 138
 139   PUT_MODE (all->reg, from_mode);
 140   set_convert_cost (to_mode, from_mode, speed,
 141                     set_src_cost (which, to_mode, speed));
 142 }
 143
 144 static void
 145 init_expmed_one_mode (struct init_expmed_rtl *all,
 146                       machine_mode mode, int speed)
 147 {
 148   int m, n, mode_bitsize;
 149   machine_mode mode_from;
 150
 151   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 152
 153   PUT_MODE (all->reg, mode);
 154   PUT_MODE (all->plus, mode);
 155   PUT_MODE (all->neg, mode);
 156   PUT_MODE (all->mult, mode);
 157   PUT_MODE (all->sdiv, mode);
 158   PUT_MODE (all->udiv, mode);
 159   PUT_MODE (all->sdiv_32, mode);
 160   PUT_MODE (all->smod_32, mode);
 161   PUT_MODE (all->wide_trunc, mode);
 162   PUT_MODE (all->shift, mode);
 163   PUT_MODE (all->shift_mult, mode);
 164   PUT_MODE (all->shift_add, mode);
 165   PUT_MODE (all->shift_sub0, mode);
 166   PUT_MODE (all->shift_sub1, mode);
 167   PUT_MODE (all->zext, mode);
 168   PUT_MODE (all->trunc, mode);
 169
 170   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
 171   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
 172   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
 173   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
 174   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
 175
 176   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
 177                                      <= 2 * add_cost (speed, mode)));
 178   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
 179                                      <= 4 * add_cost (speed, mode)));
 180
 181   set_shift_cost (speed, mode, 0, 0);
 182   {
 183     int cost = add_cost (speed, mode);
 184     set_shiftadd_cost (speed, mode, 0, cost);
 185     set_shiftsub0_cost (speed, mode, 0, cost);
 186     set_shiftsub1_cost (speed, mode, 0, cost);
 187   }
 188
 189   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 190   for (m = 1; m < n; m++)
 191     {
 192       XEXP (all->shift, 1) = all->cint[m];
 193       XEXP (all->shift_mult, 1) = all->pow2[m];
 194
 195       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
 196       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
 197                                                        speed));
 198       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
 199                                                         speed));
 200       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
 201                                                         speed));
 202     }
 203
 204   if (SCALAR_INT_MODE_P (mode))
 205     {
 206       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 207            mode_from = (machine_mode)(mode_from + 1))
 208         init_expmed_one_conv (all, mode, mode_from, speed);
 209     }
 210   if (GET_MODE_CLASS (mode) == MODE_INT)
 211     {
 212       machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
 213       if (wider_mode != VOIDmode)
 214         {
 215           PUT_MODE (all->zext, wider_mode);
 216           PUT_MODE (all->wide_mult, wider_mode);
 217           PUT_MODE (all->wide_lshr, wider_mode);
 218           XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 219
 220           set_mul_widen_cost (speed, wider_mode,
 221                               set_src_cost (all->wide_mult, wider_mode, speed));
 222           set_mul_highpart_cost (speed, mode,
 223                                  set_src_cost (all->wide_trunc, mode, speed));
 224         }
 225     }
 226 }
 227
 228 void
 229 init_expmed (void)
 230 {
 231   struct init_expmed_rtl all;
 232   machine_mode mode = QImode;
 233   int m, speed;
 234
 235   memset (&all, 0, sizeof all);
 236   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 237     {
 238       all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 239       all.cint[m] = GEN_INT (m);
 240     }
 241
 242   /* Avoid using hard regs in ways which may be unsupported.  */
 243   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 244   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 245   all.neg = gen_rtx_NEG (mode, all.reg);
 246   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 247   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 248   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 249   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 250   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 251   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 252   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 253   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 254   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 255   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 256   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 257   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 258   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 259   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 260   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 261
 262   for (speed = 0; speed < 2; speed++)
 263     {
 264       crtl->maybe_hot_insn_p = speed;
 265       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
 266
 267       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 268            mode = (machine_mode)(mode + 1))
 269         init_expmed_one_mode (&all, mode, speed);
 270
 271       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 272         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 273              mode = (machine_mode)(mode + 1))
 274           init_expmed_one_mode (&all, mode, speed);
 275
 276       if (MIN_MODE_VECTOR_INT != VOIDmode)
 277         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 278              mode = (machine_mode)(mode + 1))
 279           init_expmed_one_mode (&all, mode, speed);
 280     }
 281
 282   if (alg_hash_used_p ())
 283     {
 284       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 285       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 286     }
 287   else
 288     set_alg_hash_used_p (true);
 289   default_rtl_profile ();
 290
 291   ggc_free (all.trunc);
 292   ggc_free (all.shift_sub1);
 293   ggc_free (all.shift_sub0);
 294   ggc_free (all.shift_add);
 295   ggc_free (all.shift_mult);
 296   ggc_free (all.shift);
 297   ggc_free (all.wide_trunc);
 298   ggc_free (all.wide_lshr);
 299   ggc_free (all.wide_mult);
 300   ggc_free (all.zext);
 301   ggc_free (all.smod_32);
 302   ggc_free (all.sdiv_32);
 303   ggc_free (all.udiv);
 304   ggc_free (all.sdiv);
 305   ggc_free (all.mult);
 306   ggc_free (all.neg);
 307   ggc_free (all.plus);
 308   ggc_free (all.reg);
 309 }
 310
 311 /* Return an rtx representing minus the value of X.
 312    MODE is the intended mode of the result,
 313    useful if X is a CONST_INT.  */
 314
 315 rtx
 316 negate_rtx (machine_mode mode, rtx x)
 317 {
 318   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 319
 320   if (result == 0)
 321     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 322
 323   return result;
 324 }
 325
 326 /* Whether reverse storage order is supported on the target.  */
 327 static int reverse_storage_order_supported = -1;
 328
 329 /* Check whether reverse storage order is supported on the target.  */
 330
 331 static void
 332 check_reverse_storage_order_support (void)
 333 {
 334   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 335     {
 336       reverse_storage_order_supported = 0;
 337       sorry ("reverse scalar storage order");
 338     }
 339   else
 340     reverse_storage_order_supported = 1;
 341 }
 342
 343 /* Whether reverse FP storage order is supported on the target.  */
 344 static int reverse_float_storage_order_supported = -1;
 345
 346 /* Check whether reverse FP storage order is supported on the target.  */
 347
 348 static void
 349 check_reverse_float_storage_order_support (void)
 350 {
 351   if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 352     {
 353       reverse_float_storage_order_supported = 0;
 354       sorry ("reverse floating-point scalar storage order");
 355     }
 356   else
 357     reverse_float_storage_order_supported = 1;
 358 }
 359
 360 /* Return an rtx representing value of X with reverse storage order.
 361    MODE is the intended mode of the result,
 362    useful if X is a CONST_INT.  */
 363
 364 rtx
 365 flip_storage_order (enum machine_mode mode, rtx x)
 366 {
 367   enum machine_mode int_mode;
 368   rtx result;
 369
 370   if (mode == QImode)
 371     return x;
 372
 373   if (COMPLEX_MODE_P (mode))
 374     {
 375       rtx real = read_complex_part (x, false);
 376       rtx imag = read_complex_part (x, true);
 377
 378       real = flip_storage_order (GET_MODE_INNER (mode), real);
 379       imag = flip_storage_order (GET_MODE_INNER (mode), imag);
 380
 381       return gen_rtx_CONCAT (mode, real, imag);
 382     }
 383
 384   if (__builtin_expect (reverse_storage_order_supported < 0, 0))
 385     check_reverse_storage_order_support ();
 386
 387   if (SCALAR_INT_MODE_P (mode))
 388     int_mode = mode;
 389   else
 390     {
 391       if (FLOAT_MODE_P (mode)
 392           && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
 393         check_reverse_float_storage_order_support ();
 394
 395       int_mode = mode_for_size (GET_MODE_PRECISION (mode), MODE_INT, 0);
 396       if (int_mode == BLKmode)
 397         {
 398           sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
 399           return x;
 400         }
 401       x = gen_lowpart (int_mode, x);
 402     }
 403
 404   result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
 405   if (result == 0)
 406     result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
 407
 408   if (int_mode != mode)
 409     result = gen_lowpart (mode, result);
 410
 411   return result;
 412 }
 413
 414 /* Adjust bitfield memory MEM so that it points to the first unit of mode
 415    MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
 416    If MODE is BLKmode, return a reference to every byte in the bitfield.
 417    Set *NEW_BITNUM to the bit position of the field within the new memory.  */
 418
 419 static rtx
 420 narrow_bit_field_mem (rtx mem, machine_mode mode,
 421                       unsigned HOST_WIDE_INT bitsize,
 422                       unsigned HOST_WIDE_INT bitnum,
 423                       unsigned HOST_WIDE_INT *new_bitnum)
 424 {
 425   if (mode == BLKmode)
 426     {
 427       *new_bitnum = bitnum % BITS_PER_UNIT;
 428       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 429       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 430                             / BITS_PER_UNIT);
 431       return adjust_bitfield_address_size (mem, mode, offset, size);
 432     }
 433   else
 434     {
 435       unsigned int unit = GET_MODE_BITSIZE (mode);
 436       *new_bitnum = bitnum % unit;
 437       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 438       return adjust_bitfield_address (mem, mode, offset);
 439     }
 440 }
 441
 442 /* The caller wants to perform insertion or extraction PATTERN on a
 443    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 444    BITREGION_START and BITREGION_END are as for store_bit_field
 445    and FIELDMODE is the natural mode of the field.
 446
 447    Search for a mode that is compatible with the memory access
 448    restrictions and (where applicable) with a register insertion or
 449    extraction.  Return the new memory on success, storing the adjusted
 450    bit position in *NEW_BITNUM.  Return null otherwise.  */
 451
 452 static rtx
 453 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 454                               rtx op0, HOST_WIDE_INT bitsize,
 455                               HOST_WIDE_INT bitnum,
 456                               unsigned HOST_WIDE_INT bitregion_start,
 457                               unsigned HOST_WIDE_INT bitregion_end,
 458                               machine_mode fieldmode,
 459                               unsigned HOST_WIDE_INT *new_bitnum)
 460 {
 461   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 462                                 bitregion_end, MEM_ALIGN (op0),
 463                                 MEM_VOLATILE_P (op0));
 464   machine_mode best_mode;
 465   if (iter.next_mode (&best_mode))
 466     {
 467       /* We can use a memory in BEST_MODE.  See whether this is true for
 468          any wider modes.  All other things being equal, we prefer to
 469          use the widest mode possible because it tends to expose more
 470          CSE opportunities.  */
 471       if (!iter.prefer_smaller_modes ())
 472         {
 473           /* Limit the search to the mode required by the corresponding
 474              register insertion or extraction instruction, if any.  */
 475           machine_mode limit_mode = word_mode;
 476           extraction_insn insn;
 477           if (get_best_reg_extraction_insn (&insn, pattern,
 478                                             GET_MODE_BITSIZE (best_mode),
 479                                             fieldmode))
 480             limit_mode = insn.field_mode;
 481
 482           machine_mode wider_mode;
 483           while (iter.next_mode (&wider_mode)
 484                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 485             best_mode = wider_mode;
 486         }
 487       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 488                                    new_bitnum);
 489     }
 490   return NULL_RTX;
 491 }
 492
 493 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 494    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 495    offset is then BITNUM / BITS_PER_UNIT.  */
 496
 497 static bool
 498 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 499                      unsigned HOST_WIDE_INT bitsize,
 500                      machine_mode struct_mode)
 501 {
 502   if (BYTES_BIG_ENDIAN)
 503     return (bitnum % BITS_PER_UNIT == 0
 504             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 505                 || (bitnum + bitsize) % BITS_PER_WORD == 0));
 506   else
 507     return bitnum % BITS_PER_WORD == 0;
 508 }
 509
 510 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 511    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 512    Return false if the access would touch memory outside the range
 513    BITREGION_START to BITREGION_END for conformance to the C++ memory
 514    model.  */
 515
 516 static bool
 517 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 518                             unsigned HOST_WIDE_INT bitnum,
 519                             machine_mode fieldmode,
 520                             unsigned HOST_WIDE_INT bitregion_start,
 521                             unsigned HOST_WIDE_INT bitregion_end)
 522 {
 523   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 524
 525   /* -fstrict-volatile-bitfields must be enabled and we must have a
 526      volatile MEM.  */
 527   if (!MEM_P (op0)
 528       || !MEM_VOLATILE_P (op0)
 529       || flag_strict_volatile_bitfields <= 0)
 530     return false;
 531
 532   /* Non-integral modes likely only happen with packed structures.
 533      Punt.  */
 534   if (!SCALAR_INT_MODE_P (fieldmode))
 535     return false;
 536
 537   /* The bit size must not be larger than the field mode, and
 538      the field mode must not be larger than a word.  */
 539   if (bitsize > modesize || modesize > BITS_PER_WORD)
 540     return false;
 541
 542   /* Check for cases of unaligned fields that must be split.  */
 543   if (bitnum % modesize + bitsize > modesize)
 544     return false;
 545
 546   /* The memory must be sufficiently aligned for a MODESIZE access.
 547      This condition guarantees, that the memory access will not
 548      touch anything after the end of the structure.  */
 549   if (MEM_ALIGN (op0) < modesize)
 550     return false;
 551
 552   /* Check for cases where the C++ memory model applies.  */
 553   if (bitregion_end != 0
 554       && (bitnum - bitnum % modesize < bitregion_start
 555           || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
 556     return false;
 557
 558   return true;
 559 }
 560
 561 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 562    bit number BITNUM can be treated as a simple value of mode MODE.  */
 563
 564 static bool
 565 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 566                        unsigned HOST_WIDE_INT bitnum, machine_mode mode)
 567 {
 568   return (MEM_P (op0)
 569           && bitnum % BITS_PER_UNIT == 0
 570           && bitsize == GET_MODE_BITSIZE (mode)
 571           && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
 572               || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
 573                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 574 }
 575 \f
 576 /* Try to use instruction INSV to store VALUE into a field of OP0.
 577    BITSIZE and BITNUM are as for store_bit_field.  */
 578
 579 static bool
 580 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 581                             unsigned HOST_WIDE_INT bitsize,
 582                             unsigned HOST_WIDE_INT bitnum,
 583                             rtx value)
 584 {
 585   struct expand_operand ops[4];
 586   rtx value1;
 587   rtx xop0 = op0;
 588   rtx_insn *last = get_last_insn ();
 589   bool copy_back = false;
 590
 591   machine_mode op_mode = insv->field_mode;
 592   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 593   if (bitsize == 0 || bitsize > unit)
 594     return false;
 595
 596   if (MEM_P (xop0))
 597     /* Get a reference to the first byte of the field.  */
 598     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 599                                  &bitnum);
 600   else
 601     {
 602       /* Convert from counting within OP0 to counting in OP_MODE.  */
 603       if (BYTES_BIG_ENDIAN)
 604         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 605
 606       /* If xop0 is a register, we need it in OP_MODE
 607          to make it acceptable to the format of insv.  */
 608       if (GET_CODE (xop0) == SUBREG)
 609         /* We can't just change the mode, because this might clobber op0,
 610            and we will need the original value of op0 if insv fails.  */
 611         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 612       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 613         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 614     }
 615
 616   /* If the destination is a paradoxical subreg such that we need a
 617      truncate to the inner mode, perform the insertion on a temporary and
 618      truncate the result to the original destination.  Note that we can't
 619      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 620      X) 0)) is (reg:N X).  */
 621   if (GET_CODE (xop0) == SUBREG
 622       && REG_P (SUBREG_REG (xop0))
 623       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 624                                          op_mode))
 625     {
 626       rtx tem = gen_reg_rtx (op_mode);
 627       emit_move_insn (tem, xop0);
 628       xop0 = tem;
 629       copy_back = true;
 630     }
 631
 632   /* There are similar overflow check at the start of store_bit_field_1,
 633      but that only check the situation where the field lies completely
 634      outside the register, while there do have situation where the field
 635      lies partialy in the register, we need to adjust bitsize for this
 636      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
 637      will broken on those arch support bit insert instruction, like arm, aarch64
 638      etc.  */
 639   if (bitsize + bitnum > unit && bitnum < unit)
 640     {
 641       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
 642                "destination object, data truncated into %wu-bit",
 643                bitsize, unit - bitnum);
 644       bitsize = unit - bitnum;
 645     }
 646
 647   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 648      "backwards" from the size of the unit we are inserting into.
 649      Otherwise, we count bits from the most significant on a
 650      BYTES/BITS_BIG_ENDIAN machine.  */
 651
 652   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 653     bitnum = unit - bitsize - bitnum;
 654
 655   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 656   value1 = value;
 657   if (GET_MODE (value) != op_mode)
 658     {
 659       if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 660         {
 661           rtx tmp;
 662           /* Optimization: Don't bother really extending VALUE
 663              if it has all the bits we will actually use.  However,
 664              if we must narrow it, be sure we do it correctly.  */
 665
 666           if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 667             {
 668               tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 669               if (! tmp)
 670                 tmp = simplify_gen_subreg (op_mode,
 671                                            force_reg (GET_MODE (value),
 672                                                       value1),
 673                                            GET_MODE (value), 0);
 674             }
 675           else
 676             {
 677               tmp = gen_lowpart_if_possible (op_mode, value1);
 678               if (! tmp)
 679                 tmp = gen_lowpart (op_mode, force_reg (GET_MODE (value),
 680                                                        value1));
 681             }
 682           value1 = tmp;
 683         }
 684       else if (CONST_INT_P (value))
 685         value1 = gen_int_mode (INTVAL (value), op_mode);
 686       else
 687         /* Parse phase is supposed to make VALUE's data type
 688            match that of the component reference, which is a type
 689            at least as wide as the field; so VALUE should have
 690            a mode that corresponds to that type.  */
 691         gcc_assert (CONSTANT_P (value));
 692     }
 693
 694   create_fixed_operand (&ops[0], xop0);
 695   create_integer_operand (&ops[1], bitsize);
 696   create_integer_operand (&ops[2], bitnum);
 697   create_input_operand (&ops[3], value1, op_mode);
 698   if (maybe_expand_insn (insv->icode, 4, ops))
 699     {
 700       if (copy_back)
 701         convert_move (op0, xop0, true);
 702       return true;
 703     }
 704   delete_insns_since (last);
 705   return false;
 706 }
 707
 708 /* A subroutine of store_bit_field, with the same arguments.  Return true
 709    if the operation could be implemented.
 710
 711    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 712    no other way of implementing the operation.  If FALLBACK_P is false,
 713    return false instead.  */
 714
 715 static bool
 716 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 717                    unsigned HOST_WIDE_INT bitnum,
 718                    unsigned HOST_WIDE_INT bitregion_start,
 719                    unsigned HOST_WIDE_INT bitregion_end,
 720                    machine_mode fieldmode,
 721                    rtx value, bool reverse, bool fallback_p)
 722 {
 723   rtx op0 = str_rtx;
 724   rtx orig_value;
 725
 726   while (GET_CODE (op0) == SUBREG)
 727     {
 728       /* The following line once was done only if WORDS_BIG_ENDIAN,
 729          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 730          meaningful at a much higher level; when structures are copied
 731          between memory and regs, the higher-numbered regs
 732          always get higher addresses.  */
 733       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 734       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 735       int byte_offset = 0;
 736
 737       /* Paradoxical subregs need special handling on big-endian machines.  */
 738       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 739         {
 740           int difference = inner_mode_size - outer_mode_size;
 741
 742           if (WORDS_BIG_ENDIAN)
 743             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 744           if (BYTES_BIG_ENDIAN)
 745             byte_offset += difference % UNITS_PER_WORD;
 746         }
 747       else
 748         byte_offset = SUBREG_BYTE (op0);
 749
 750       bitnum += byte_offset * BITS_PER_UNIT;
 751       op0 = SUBREG_REG (op0);
 752     }
 753
 754   /* No action is needed if the target is a register and if the field
 755      lies completely outside that register.  This can occur if the source
 756      code contains an out-of-bounds access to a small array.  */
 757   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 758     return true;
 759
 760   /* Use vec_set patterns for inserting parts of vectors whenever
 761      available.  */
 762   if (VECTOR_MODE_P (GET_MODE (op0))
 763       && !MEM_P (op0)
 764       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 765       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 766       && bitsize == GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
 767       && !(bitnum % GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
 768     {
 769       struct expand_operand ops[3];
 770       machine_mode outermode = GET_MODE (op0);
 771       machine_mode innermode = GET_MODE_INNER (outermode);
 772       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 773       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 774
 775       create_fixed_operand (&ops[0], op0);
 776       create_input_operand (&ops[1], value, innermode);
 777       create_integer_operand (&ops[2], pos);
 778       if (maybe_expand_insn (icode, 3, ops))
 779         return true;
 780     }
 781
 782   /* If the target is a register, overwriting the entire object, or storing
 783      a full-word or multi-word field can be done with just a SUBREG.  */
 784   if (!MEM_P (op0)
 785       && bitsize == GET_MODE_BITSIZE (fieldmode)
 786       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 787           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 788     {
 789       /* Use the subreg machinery either to narrow OP0 to the required
 790          words or to cope with mode punning between equal-sized modes.
 791          In the latter case, use subreg on the rhs side, not lhs.  */
 792       rtx sub;
 793
 794       if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
 795         {
 796           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 797           if (sub)
 798             {
 799               if (reverse)
 800                 sub = flip_storage_order (GET_MODE (op0), sub);
 801               emit_move_insn (op0, sub);
 802               return true;
 803             }
 804         }
 805       else
 806         {
 807           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 808                                      bitnum / BITS_PER_UNIT);
 809           if (sub)
 810             {
 811               if (reverse)
 812                 value = flip_storage_order (fieldmode, value);
 813               emit_move_insn (sub, value);
 814               return true;
 815             }
 816         }
 817     }
 818
 819   /* If the target is memory, storing any naturally aligned field can be
 820      done with a simple store.  For targets that support fast unaligned
 821      memory, any naturally sized, unit aligned field can be done directly.  */
 822   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
 823     {
 824       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 825       if (reverse)
 826         value = flip_storage_order (fieldmode, value);
 827       emit_move_insn (op0, value);
 828       return true;
 829     }
 830
 831   /* Make sure we are playing with integral modes.  Pun with subregs
 832      if we aren't.  This must come after the entire register case above,
 833      since that case is valid for any mode.  The following cases are only
 834      valid for integral modes.  */
 835   {
 836     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 837     if (imode != GET_MODE (op0))
 838       {
 839         if (MEM_P (op0))
 840           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
 841         else
 842           {
 843             gcc_assert (imode != BLKmode);
 844             op0 = gen_lowpart (imode, op0);
 845           }
 846       }
 847   }
 848
 849   /* Storing an lsb-aligned field in a register
 850      can be done with a movstrict instruction.  */
 851
 852   if (!MEM_P (op0)
 853       && !reverse
 854       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 855       && bitsize == GET_MODE_BITSIZE (fieldmode)
 856       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 857     {
 858       struct expand_operand ops[2];
 859       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 860       rtx arg0 = op0;
 861       unsigned HOST_WIDE_INT subreg_off;
 862
 863       if (GET_CODE (arg0) == SUBREG)
 864         {
 865           /* Else we've got some float mode source being extracted into
 866              a different float mode destination -- this combination of
 867              subregs results in Severe Tire Damage.  */
 868           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 869                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 870                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 871           arg0 = SUBREG_REG (arg0);
 872         }
 873
 874       subreg_off = bitnum / BITS_PER_UNIT;
 875       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 876         {
 877           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 878
 879           create_fixed_operand (&ops[0], arg0);
 880           /* Shrink the source operand to FIELDMODE.  */
 881           create_convert_operand_to (&ops[1], value, fieldmode, false);
 882           if (maybe_expand_insn (icode, 2, ops))
 883             return true;
 884         }
 885     }
 886
 887   /* Handle fields bigger than a word.  */
 888
 889   if (bitsize > BITS_PER_WORD)
 890     {
 891       /* Here we transfer the words of the field
 892          in the order least significant first.
 893          This is because the most significant word is the one which may
 894          be less than full.
 895          However, only do that if the value is not BLKmode.  */
 896
 897       const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 898       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 899       unsigned int i;
 900       rtx_insn *last;
 901
 902       /* This is the mode we must force value to, so that there will be enough
 903          subwords to extract.  Note that fieldmode will often (always?) be
 904          VOIDmode, because that is what store_field uses to indicate that this
 905          is a bit field, but passing VOIDmode to operand_subword_force
 906          is not allowed.  */
 907       fieldmode = GET_MODE (value);
 908       if (fieldmode == VOIDmode)
 909         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 910
 911       last = get_last_insn ();
 912       for (i = 0; i < nwords; i++)
 913         {
 914           /* If I is 0, use the low-order word in both field and target;
 915              if I is 1, use the next to lowest word; and so on.  */
 916           unsigned int wordnum = (backwards
 917                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 918                                   - i - 1
 919                                   : i);
 920           unsigned int bit_offset = (backwards ^ reverse
 921                                      ? MAX ((int) bitsize - ((int) i + 1)
 922                                             * BITS_PER_WORD,
 923                                             0)
 924                                      : (int) i * BITS_PER_WORD);
 925           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 926           unsigned HOST_WIDE_INT new_bitsize =
 927             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 928
 929           /* If the remaining chunk doesn't have full wordsize we have
 930              to make sure that for big-endian machines the higher order
 931              bits are used.  */
 932           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 933             value_word = simplify_expand_binop (word_mode, lshr_optab,
 934                                                 value_word,
 935                                                 GEN_INT (BITS_PER_WORD
 936                                                          - new_bitsize),
 937                                                 NULL_RTX, true,
 938                                                 OPTAB_LIB_WIDEN);
 939
 940           if (!store_bit_field_1 (op0, new_bitsize,
 941                                   bitnum + bit_offset,
 942                                   bitregion_start, bitregion_end,
 943                                   word_mode,
 944                                   value_word, reverse, fallback_p))
 945             {
 946               delete_insns_since (last);
 947               return false;
 948             }
 949         }
 950       return true;
 951     }
 952
 953   /* If VALUE has a floating-point or complex mode, access it as an
 954      integer of the corresponding size.  This can occur on a machine
 955      with 64 bit registers that uses SFmode for float.  It can also
 956      occur for unaligned float or complex fields.  */
 957   orig_value = value;
 958   if (GET_MODE (value) != VOIDmode
 959       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 960       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 961     {
 962       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 963       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 964     }
 965
 966   /* If OP0 is a multi-word register, narrow it to the affected word.
 967      If the region spans two words, defer to store_split_bit_field.  */
 968   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 969     {
 970       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
 971                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 972       gcc_assert (op0);
 973       bitnum %= BITS_PER_WORD;
 974       if (bitnum + bitsize > BITS_PER_WORD)
 975         {
 976           if (!fallback_p)
 977             return false;
 978
 979           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 980                                  bitregion_end, value, reverse);
 981           return true;
 982         }
 983     }
 984
 985   /* From here on we can assume that the field to be stored in fits
 986      within a word.  If the destination is a register, it too fits
 987      in a word.  */
 988
 989   extraction_insn insv;
 990   if (!MEM_P (op0)
 991       && !reverse
 992       && get_best_reg_extraction_insn (&insv, EP_insv,
 993                                        GET_MODE_BITSIZE (GET_MODE (op0)),
 994                                        fieldmode)
 995       && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 996     return true;
 997
 998   /* If OP0 is a memory, try copying it to a register and seeing if a
 999      cheap register alternative is available.  */
1000   if (MEM_P (op0) && !reverse)
1001     {
1002       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1003                                         fieldmode)
1004           && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
1005         return true;
1006
1007       rtx_insn *last = get_last_insn ();
1008
1009       /* Try loading part of OP0 into a register, inserting the bitfield
1010          into that, and then copying the result back to OP0.  */
1011       unsigned HOST_WIDE_INT bitpos;
1012       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1013                                                bitregion_start, bitregion_end,
1014                                                fieldmode, &bitpos);
1015       if (xop0)
1016         {
1017           rtx tempreg = copy_to_reg (xop0);
1018           if (store_bit_field_1 (tempreg, bitsize, bitpos,
1019                                  bitregion_start, bitregion_end,
1020                                  fieldmode, orig_value, reverse, false))
1021             {
1022               emit_move_insn (xop0, tempreg);
1023               return true;
1024             }
1025           delete_insns_since (last);
1026         }
1027     }
1028
1029   if (!fallback_p)
1030     return false;
1031
1032   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
1033                          bitregion_end, value, reverse);
1034   return true;
1035 }
1036
1037 /* Generate code to store value from rtx VALUE
1038    into a bit-field within structure STR_RTX
1039    containing BITSIZE bits starting at bit BITNUM.
1040
1041    BITREGION_START is bitpos of the first bitfield in this region.
1042    BITREGION_END is the bitpos of the ending bitfield in this region.
1043    These two fields are 0, if the C++ memory model does not apply,
1044    or we are not interested in keeping track of bitfield regions.
1045
1046    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1047
1048    If REVERSE is true, the store is to be done in reverse order.  */
1049
1050 void
1051 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1052                  unsigned HOST_WIDE_INT bitnum,
1053                  unsigned HOST_WIDE_INT bitregion_start,
1054                  unsigned HOST_WIDE_INT bitregion_end,
1055                  machine_mode fieldmode,
1056                  rtx value, bool reverse)
1057 {
1058   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1059   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
1060                                   bitregion_start, bitregion_end))
1061     {
1062       /* Storing of a full word can be done with a simple store.
1063          We know here that the field can be accessed with one single
1064          instruction.  For targets that support unaligned memory,
1065          an unaligned access may be necessary.  */
1066       if (bitsize == GET_MODE_BITSIZE (fieldmode))
1067         {
1068           str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
1069                                              bitnum / BITS_PER_UNIT);
1070           if (reverse)
1071             value = flip_storage_order (fieldmode, value);
1072           gcc_assert (bitnum % BITS_PER_UNIT == 0);
1073           emit_move_insn (str_rtx, value);
1074         }
1075       else
1076         {
1077           rtx temp;
1078
1079           str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
1080                                           &bitnum);
1081           gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (fieldmode));
1082           temp = copy_to_reg (str_rtx);
1083           if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
1084                                   fieldmode, value, reverse, true))
1085             gcc_unreachable ();
1086
1087           emit_move_insn (str_rtx, temp);
1088         }
1089
1090       return;
1091     }
1092
1093   /* Under the C++0x memory model, we must not touch bits outside the
1094      bit region.  Adjust the address to start at the beginning of the
1095      bit region.  */
1096   if (MEM_P (str_rtx) && bitregion_start > 0)
1097     {
1098       machine_mode bestmode;
1099       HOST_WIDE_INT offset, size;
1100
1101       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1102
1103       offset = bitregion_start / BITS_PER_UNIT;
1104       bitnum -= bitregion_start;
1105       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1106       bitregion_end -= bitregion_start;
1107       bitregion_start = 0;
1108       bestmode = get_best_mode (bitsize, bitnum,
1109                                 bitregion_start, bitregion_end,
1110                                 MEM_ALIGN (str_rtx), VOIDmode,
1111                                 MEM_VOLATILE_P (str_rtx));
1112       str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
1113     }
1114
1115   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1116                           bitregion_start, bitregion_end,
1117                           fieldmode, value, reverse, true))
1118     gcc_unreachable ();
1119 }
1120 \f
1121 /* Use shifts and boolean operations to store VALUE into a bit field of
1122    width BITSIZE in OP0, starting at bit BITNUM.
1123
1124    If REVERSE is true, the store is to be done in reverse order.  */
1125
1126 static void
1127 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1128                        unsigned HOST_WIDE_INT bitnum,
1129                        unsigned HOST_WIDE_INT bitregion_start,
1130                        unsigned HOST_WIDE_INT bitregion_end,
1131                        rtx value, bool reverse)
1132 {
1133   /* There is a case not handled here:
1134      a structure with a known alignment of just a halfword
1135      and a field split across two aligned halfwords within the structure.
1136      Or likewise a structure with a known alignment of just a byte
1137      and a field split across two bytes.
1138      Such cases are not supposed to be able to occur.  */
1139
1140   if (MEM_P (op0))
1141     {
1142       machine_mode mode = GET_MODE (op0);
1143       if (GET_MODE_BITSIZE (mode) == 0
1144           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1145         mode = word_mode;
1146       mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1147                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1148
1149       if (mode == VOIDmode)
1150         {
1151           /* The only way this should occur is if the field spans word
1152              boundaries.  */
1153           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1154                                  bitregion_end, value, reverse);
1155           return;
1156         }
1157
1158       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1159     }
1160
1161   store_fixed_bit_field_1 (op0, bitsize, bitnum, value, reverse);
1162 }
1163
1164 /* Helper function for store_fixed_bit_field, stores
1165    the bit field always using the MODE of OP0.  */
1166
1167 static void
1168 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1169                          unsigned HOST_WIDE_INT bitnum,
1170                          rtx value, bool reverse)
1171 {
1172   machine_mode mode;
1173   rtx temp;
1174   int all_zero = 0;
1175   int all_one = 0;
1176
1177   mode = GET_MODE (op0);
1178   gcc_assert (SCALAR_INT_MODE_P (mode));
1179
1180   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1181      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1182
1183   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1184     /* BITNUM is the distance between our msb
1185        and that of the containing datum.
1186        Convert it to the distance from the lsb.  */
1187     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1188
1189   /* Now BITNUM is always the distance between our lsb
1190      and that of OP0.  */
1191
1192   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1193      we must first convert its mode to MODE.  */
1194
1195   if (CONST_INT_P (value))
1196     {
1197       unsigned HOST_WIDE_INT v = UINTVAL (value);
1198
1199       if (bitsize < HOST_BITS_PER_WIDE_INT)
1200         v &= ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1;
1201
1202       if (v == 0)
1203         all_zero = 1;
1204       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1205                 && v == ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1)
1206                || (bitsize == HOST_BITS_PER_WIDE_INT
1207                    && v == (unsigned HOST_WIDE_INT) -1))
1208         all_one = 1;
1209
1210       value = lshift_value (mode, v, bitnum);
1211     }
1212   else
1213     {
1214       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1215                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1216
1217       if (GET_MODE (value) != mode)
1218         value = convert_to_mode (mode, value, 1);
1219
1220       if (must_and)
1221         value = expand_binop (mode, and_optab, value,
1222                               mask_rtx (mode, 0, bitsize, 0),
1223                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1224       if (bitnum > 0)
1225         value = expand_shift (LSHIFT_EXPR, mode, value,
1226                               bitnum, NULL_RTX, 1);
1227     }
1228
1229   if (reverse)
1230     value = flip_storage_order (mode, value);
1231
1232   /* Now clear the chosen bits in OP0,
1233      except that if VALUE is -1 we need not bother.  */
1234   /* We keep the intermediates in registers to allow CSE to combine
1235      consecutive bitfield assignments.  */
1236
1237   temp = force_reg (mode, op0);
1238
1239   if (! all_one)
1240     {
1241       rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1242       if (reverse)
1243         mask = flip_storage_order (mode, mask);
1244       temp = expand_binop (mode, and_optab, temp, mask,
1245                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1246       temp = force_reg (mode, temp);
1247     }
1248
1249   /* Now logical-or VALUE into OP0, unless it is zero.  */
1250
1251   if (! all_zero)
1252     {
1253       temp = expand_binop (mode, ior_optab, temp, value,
1254                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1255       temp = force_reg (mode, temp);
1256     }
1257
1258   if (op0 != temp)
1259     {
1260       op0 = copy_rtx (op0);
1261       emit_move_insn (op0, temp);
1262     }
1263 }
1264 \f
1265 /* Store a bit field that is split across multiple accessible memory objects.
1266
1267    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1268    BITSIZE is the field width; BITPOS the position of its first bit
1269    (within the word).
1270    VALUE is the value to store.
1271
1272    If REVERSE is true, the store is to be done in reverse order.
1273
1274    This does not yet handle fields wider than BITS_PER_WORD.  */
1275
1276 static void
1277 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1278                        unsigned HOST_WIDE_INT bitpos,
1279                        unsigned HOST_WIDE_INT bitregion_start,
1280                        unsigned HOST_WIDE_INT bitregion_end,
1281                        rtx value, bool reverse)
1282 {
1283   unsigned int unit, total_bits, bitsdone = 0;
1284
1285   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1286      much at a time.  */
1287   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1288     unit = BITS_PER_WORD;
1289   else
1290     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1291
1292   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1293      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1294      again, and we will mutually recurse forever.  */
1295   if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1296     unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1297
1298   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1299      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1300      that VALUE might be a floating-point constant.  */
1301   if (CONSTANT_P (value) && !CONST_INT_P (value))
1302     {
1303       rtx word = gen_lowpart_common (word_mode, value);
1304
1305       if (word && (value != word))
1306         value = word;
1307       else
1308         value = gen_lowpart_common (word_mode,
1309                                     force_reg (GET_MODE (value) != VOIDmode
1310                                                ? GET_MODE (value)
1311                                                : word_mode, value));
1312     }
1313
1314   total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1315
1316   while (bitsdone < bitsize)
1317     {
1318       unsigned HOST_WIDE_INT thissize;
1319       unsigned HOST_WIDE_INT thispos;
1320       unsigned HOST_WIDE_INT offset;
1321       rtx part, word;
1322
1323       offset = (bitpos + bitsdone) / unit;
1324       thispos = (bitpos + bitsdone) % unit;
1325
1326       /* When region of bytes we can touch is restricted, decrease
1327          UNIT close to the end of the region as needed.  If op0 is a REG
1328          or SUBREG of REG, don't do this, as there can't be data races
1329          on a register and we can expand shorter code in some cases.  */
1330       if (bitregion_end
1331           && unit > BITS_PER_UNIT
1332           && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1333           && !REG_P (op0)
1334           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1335         {
1336           unit = unit / 2;
1337           continue;
1338         }
1339
1340       /* THISSIZE must not overrun a word boundary.  Otherwise,
1341          store_fixed_bit_field will call us again, and we will mutually
1342          recurse forever.  */
1343       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1344       thissize = MIN (thissize, unit - thispos);
1345
1346       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1347         {
1348           /* Fetch successively less significant portions.  */
1349           if (CONST_INT_P (value))
1350             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1351                              >> (bitsize - bitsdone - thissize))
1352                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1353           /* Likewise, but the source is little-endian.  */
1354           else if (reverse)
1355             part = extract_fixed_bit_field (word_mode, value, thissize,
1356                                             bitsize - bitsdone - thissize,
1357                                             NULL_RTX, 1, false);
1358           else
1359             {
1360               int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1361               /* The args are chosen so that the last part includes the
1362                  lsb.  Give extract_bit_field the value it needs (with
1363                  endianness compensation) to fetch the piece we want.  */
1364               part = extract_fixed_bit_field (word_mode, value, thissize,
1365                                               total_bits - bitsize + bitsdone,
1366                                               NULL_RTX, 1, false);
1367             }
1368         }
1369       else
1370         {
1371           /* Fetch successively more significant portions.  */
1372           if (CONST_INT_P (value))
1373             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1374                              >> bitsdone)
1375                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1376           /* Likewise, but the source is big-endian.  */
1377           else if (reverse)
1378             part = extract_fixed_bit_field (word_mode, value, thissize,
1379                                             total_bits - bitsdone - thissize,
1380                                             NULL_RTX, 1, false);
1381           else
1382             part = extract_fixed_bit_field (word_mode, value, thissize,
1383                                             bitsdone, NULL_RTX, 1, false);
1384         }
1385
1386       /* If OP0 is a register, then handle OFFSET here.
1387
1388          When handling multiword bitfields, extract_bit_field may pass
1389          down a word_mode SUBREG of a larger REG for a bitfield that actually
1390          crosses a word boundary.  Thus, for a SUBREG, we must find
1391          the current word starting from the base register.  */
1392       if (GET_CODE (op0) == SUBREG)
1393         {
1394           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD)
1395                             + (offset * unit / BITS_PER_WORD);
1396           machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1397           if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1398             word = word_offset ? const0_rtx : op0;
1399           else
1400             word = operand_subword_force (SUBREG_REG (op0), word_offset,
1401                                           GET_MODE (SUBREG_REG (op0)));
1402           offset &= BITS_PER_WORD / unit - 1;
1403         }
1404       else if (REG_P (op0))
1405         {
1406           machine_mode op0_mode = GET_MODE (op0);
1407           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1408             word = offset ? const0_rtx : op0;
1409           else
1410             word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1411                                           GET_MODE (op0));
1412           offset &= BITS_PER_WORD / unit - 1;
1413         }
1414       else
1415         word = op0;
1416
1417       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1418          it is just an out-of-bounds access.  Ignore it.  */
1419       if (word != const0_rtx)
1420         store_fixed_bit_field (word, thissize, offset * unit + thispos,
1421                                bitregion_start, bitregion_end, part,
1422                                reverse);
1423       bitsdone += thissize;
1424     }
1425 }
1426 \f
1427 /* A subroutine of extract_bit_field_1 that converts return value X
1428    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1429    to extract_bit_field.  */
1430
1431 static rtx
1432 convert_extracted_bit_field (rtx x, machine_mode mode,
1433                              machine_mode tmode, bool unsignedp)
1434 {
1435   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1436     return x;
1437
1438   /* If the x mode is not a scalar integral, first convert to the
1439      integer mode of that size and then access it as a floating-point
1440      value via a SUBREG.  */
1441   if (!SCALAR_INT_MODE_P (tmode))
1442     {
1443       machine_mode smode;
1444
1445       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1446       x = convert_to_mode (smode, x, unsignedp);
1447       x = force_reg (smode, x);
1448       return gen_lowpart (tmode, x);
1449     }
1450
1451   return convert_to_mode (tmode, x, unsignedp);
1452 }
1453
1454 /* Try to use an ext(z)v pattern to extract a field from OP0.
1455    Return the extracted value on success, otherwise return null.
1456    EXT_MODE is the mode of the extraction and the other arguments
1457    are as for extract_bit_field.  */
1458
1459 static rtx
1460 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1461                               unsigned HOST_WIDE_INT bitsize,
1462                               unsigned HOST_WIDE_INT bitnum,
1463                               int unsignedp, rtx target,
1464                               machine_mode mode, machine_mode tmode)
1465 {
1466   struct expand_operand ops[4];
1467   rtx spec_target = target;
1468   rtx spec_target_subreg = 0;
1469   machine_mode ext_mode = extv->field_mode;
1470   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1471
1472   if (bitsize == 0 || unit < bitsize)
1473     return NULL_RTX;
1474
1475   if (MEM_P (op0))
1476     /* Get a reference to the first byte of the field.  */
1477     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1478                                 &bitnum);
1479   else
1480     {
1481       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1482       if (BYTES_BIG_ENDIAN)
1483         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1484
1485       /* If op0 is a register, we need it in EXT_MODE to make it
1486          acceptable to the format of ext(z)v.  */
1487       if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1488         return NULL_RTX;
1489       if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1490         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1491     }
1492
1493   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1494      "backwards" from the size of the unit we are extracting from.
1495      Otherwise, we count bits from the most significant on a
1496      BYTES/BITS_BIG_ENDIAN machine.  */
1497
1498   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1499     bitnum = unit - bitsize - bitnum;
1500
1501   if (target == 0)
1502     target = spec_target = gen_reg_rtx (tmode);
1503
1504   if (GET_MODE (target) != ext_mode)
1505     {
1506       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1507          between the mode of the extraction (word_mode) and the target
1508          mode.  Instead, create a temporary and use convert_move to set
1509          the target.  */
1510       if (REG_P (target)
1511           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1512         {
1513           target = gen_lowpart (ext_mode, target);
1514           if (GET_MODE_PRECISION (ext_mode)
1515               > GET_MODE_PRECISION (GET_MODE (spec_target)))
1516             spec_target_subreg = target;
1517         }
1518       else
1519         target = gen_reg_rtx (ext_mode);
1520     }
1521
1522   create_output_operand (&ops[0], target, ext_mode);
1523   create_fixed_operand (&ops[1], op0);
1524   create_integer_operand (&ops[2], bitsize);
1525   create_integer_operand (&ops[3], bitnum);
1526   if (maybe_expand_insn (extv->icode, 4, ops))
1527     {
1528       target = ops[0].value;
1529       if (target == spec_target)
1530         return target;
1531       if (target == spec_target_subreg)
1532         return spec_target;
1533       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1534     }
1535   return NULL_RTX;
1536 }
1537
1538 /* A subroutine of extract_bit_field, with the same arguments.
1539    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1540    if we can find no other means of implementing the operation.
1541    if FALLBACK_P is false, return NULL instead.  */
1542
1543 static rtx
1544 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1545                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1546                      machine_mode mode, machine_mode tmode,
1547                      bool reverse, bool fallback_p)
1548 {
1549   rtx op0 = str_rtx;
1550   machine_mode int_mode;
1551   machine_mode mode1;
1552
1553   if (tmode == VOIDmode)
1554     tmode = mode;
1555
1556   while (GET_CODE (op0) == SUBREG)
1557     {
1558       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1559       op0 = SUBREG_REG (op0);
1560     }
1561
1562   /* If we have an out-of-bounds access to a register, just return an
1563      uninitialized register of the required mode.  This can occur if the
1564      source code contains an out-of-bounds access to a small array.  */
1565   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1566     return gen_reg_rtx (tmode);
1567
1568   if (REG_P (op0)
1569       && mode == GET_MODE (op0)
1570       && bitnum == 0
1571       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1572     {
1573       if (reverse)
1574         op0 = flip_storage_order (mode, op0);
1575       /* We're trying to extract a full register from itself.  */
1576       return op0;
1577     }
1578
1579   /* See if we can get a better vector mode before extracting.  */
1580   if (VECTOR_MODE_P (GET_MODE (op0))
1581       && !MEM_P (op0)
1582       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1583     {
1584       machine_mode new_mode;
1585
1586       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1587         new_mode = MIN_MODE_VECTOR_FLOAT;
1588       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1589         new_mode = MIN_MODE_VECTOR_FRACT;
1590       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1591         new_mode = MIN_MODE_VECTOR_UFRACT;
1592       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1593         new_mode = MIN_MODE_VECTOR_ACCUM;
1594       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1595         new_mode = MIN_MODE_VECTOR_UACCUM;
1596       else
1597         new_mode = MIN_MODE_VECTOR_INT;
1598
1599       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1600         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1601             && targetm.vector_mode_supported_p (new_mode))
1602           break;
1603       if (new_mode != VOIDmode)
1604         op0 = gen_lowpart (new_mode, op0);
1605     }
1606
1607   /* Use vec_extract patterns for extracting parts of vectors whenever
1608      available.  */
1609   if (VECTOR_MODE_P (GET_MODE (op0))
1610       && !MEM_P (op0)
1611       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1612       && ((bitnum + bitsize - 1) / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
1613           == bitnum / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
1614     {
1615       struct expand_operand ops[3];
1616       machine_mode outermode = GET_MODE (op0);
1617       machine_mode innermode = GET_MODE_INNER (outermode);
1618       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1619       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1620
1621       create_output_operand (&ops[0], target, innermode);
1622       create_input_operand (&ops[1], op0, outermode);
1623       create_integer_operand (&ops[2], pos);
1624       if (maybe_expand_insn (icode, 3, ops))
1625         {
1626           target = ops[0].value;
1627           if (GET_MODE (target) != mode)
1628             return gen_lowpart (tmode, target);
1629           return target;
1630         }
1631     }
1632
1633   /* Make sure we are playing with integral modes.  Pun with subregs
1634      if we aren't.  */
1635   {
1636     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1637     if (imode != GET_MODE (op0))
1638       {
1639         if (MEM_P (op0))
1640           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1641         else if (imode != BLKmode)
1642           {
1643             op0 = gen_lowpart (imode, op0);
1644
1645             /* If we got a SUBREG, force it into a register since we
1646                aren't going to be able to do another SUBREG on it.  */
1647             if (GET_CODE (op0) == SUBREG)
1648               op0 = force_reg (imode, op0);
1649           }
1650         else
1651           {
1652             HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1653             rtx mem = assign_stack_temp (GET_MODE (op0), size);
1654             emit_move_insn (mem, op0);
1655             op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1656           }
1657       }
1658   }
1659
1660   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1661      If that's wrong, the solution is to test for it and set TARGET to 0
1662      if needed.  */
1663
1664   /* Get the mode of the field to use for atomic access or subreg
1665      conversion.  */
1666   mode1 = mode;
1667   if (SCALAR_INT_MODE_P (tmode))
1668     {
1669       machine_mode try_mode = mode_for_size (bitsize,
1670                                                   GET_MODE_CLASS (tmode), 0);
1671       if (try_mode != BLKmode)
1672         mode1 = try_mode;
1673     }
1674   gcc_assert (mode1 != BLKmode);
1675
1676   /* Extraction of a full MODE1 value can be done with a subreg as long
1677      as the least significant bit of the value is the least significant
1678      bit of either OP0 or a word of OP0.  */
1679   if (!MEM_P (op0)
1680       && !reverse
1681       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1682       && bitsize == GET_MODE_BITSIZE (mode1)
1683       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1684     {
1685       rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1686                                      bitnum / BITS_PER_UNIT);
1687       if (sub)
1688         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1689     }
1690
1691   /* Extraction of a full MODE1 value can be done with a load as long as
1692      the field is on a byte boundary and is sufficiently aligned.  */
1693   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1694     {
1695       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1696       if (reverse)
1697         op0 = flip_storage_order (mode1, op0);
1698       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1699     }
1700
1701   /* Handle fields bigger than a word.  */
1702
1703   if (bitsize > BITS_PER_WORD)
1704     {
1705       /* Here we transfer the words of the field
1706          in the order least significant first.
1707          This is because the most significant word is the one which may
1708          be less than full.  */
1709
1710       const bool backwards = WORDS_BIG_ENDIAN;
1711       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1712       unsigned int i;
1713       rtx_insn *last;
1714
1715       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1716         target = gen_reg_rtx (mode);
1717
1718       /* In case we're about to clobber a base register or something
1719          (see gcc.c-torture/execute/20040625-1.c).   */
1720       if (reg_mentioned_p (target, str_rtx))
1721         target = gen_reg_rtx (mode);
1722
1723       /* Indicate for flow that the entire target reg is being set.  */
1724       emit_clobber (target);
1725
1726       last = get_last_insn ();
1727       for (i = 0; i < nwords; i++)
1728         {
1729           /* If I is 0, use the low-order word in both field and target;
1730              if I is 1, use the next to lowest word; and so on.  */
1731           /* Word number in TARGET to use.  */
1732           unsigned int wordnum
1733             = (backwards
1734                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1735                : i);
1736           /* Offset from start of field in OP0.  */
1737           unsigned int bit_offset = (backwards ^ reverse
1738                                      ? MAX ((int) bitsize - ((int) i + 1)
1739                                             * BITS_PER_WORD,
1740                                             0)
1741                                      : (int) i * BITS_PER_WORD);
1742           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1743           rtx result_part
1744             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1745                                              bitsize - i * BITS_PER_WORD),
1746                                    bitnum + bit_offset, 1, target_part,
1747                                    mode, word_mode, reverse, fallback_p);
1748
1749           gcc_assert (target_part);
1750           if (!result_part)
1751             {
1752               delete_insns_since (last);
1753               return NULL;
1754             }
1755
1756           if (result_part != target_part)
1757             emit_move_insn (target_part, result_part);
1758         }
1759
1760       if (unsignedp)
1761         {
1762           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1763              need to be zero'd out.  */
1764           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1765             {
1766               unsigned int i, total_words;
1767
1768               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1769               for (i = nwords; i < total_words; i++)
1770                 emit_move_insn
1771                   (operand_subword (target,
1772                                     backwards ? total_words - i - 1 : i,
1773                                     1, VOIDmode),
1774                    const0_rtx);
1775             }
1776           return target;
1777         }
1778
1779       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1780       target = expand_shift (LSHIFT_EXPR, mode, target,
1781                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1782       return expand_shift (RSHIFT_EXPR, mode, target,
1783                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1784     }
1785
1786   /* If OP0 is a multi-word register, narrow it to the affected word.
1787      If the region spans two words, defer to extract_split_bit_field.  */
1788   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1789     {
1790       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1791                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1792       bitnum %= BITS_PER_WORD;
1793       if (bitnum + bitsize > BITS_PER_WORD)
1794         {
1795           if (!fallback_p)
1796             return NULL_RTX;
1797           target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1798                                             reverse);
1799           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1800         }
1801     }
1802
1803   /* From here on we know the desired field is smaller than a word.
1804      If OP0 is a register, it too fits within a word.  */
1805   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1806   extraction_insn extv;
1807   if (!MEM_P (op0)
1808       && !reverse
1809       /* ??? We could limit the structure size to the part of OP0 that
1810          contains the field, with appropriate checks for endianness
1811          and TRULY_NOOP_TRUNCATION.  */
1812       && get_best_reg_extraction_insn (&extv, pattern,
1813                                        GET_MODE_BITSIZE (GET_MODE (op0)),
1814                                        tmode))
1815     {
1816       rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1817                                                  unsignedp, target, mode,
1818                                                  tmode);
1819       if (result)
1820         return result;
1821     }
1822
1823   /* If OP0 is a memory, try copying it to a register and seeing if a
1824      cheap register alternative is available.  */
1825   if (MEM_P (op0) & !reverse)
1826     {
1827       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1828                                         tmode))
1829         {
1830           rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1831                                                      bitnum, unsignedp,
1832                                                      target, mode,
1833                                                      tmode);
1834           if (result)
1835             return result;
1836         }
1837
1838       rtx_insn *last = get_last_insn ();
1839
1840       /* Try loading part of OP0 into a register and extracting the
1841          bitfield from that.  */
1842       unsigned HOST_WIDE_INT bitpos;
1843       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1844                                                0, 0, tmode, &bitpos);
1845       if (xop0)
1846         {
1847           xop0 = copy_to_reg (xop0);
1848           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1849                                             unsignedp, target,
1850                                             mode, tmode, reverse, false);
1851           if (result)
1852             return result;
1853           delete_insns_since (last);
1854         }
1855     }
1856
1857   if (!fallback_p)
1858     return NULL;
1859
1860   /* Find a correspondingly-sized integer field, so we can apply
1861      shifts and masks to it.  */
1862   int_mode = int_mode_for_mode (tmode);
1863   if (int_mode == BLKmode)
1864     int_mode = int_mode_for_mode (mode);
1865   /* Should probably push op0 out to memory and then do a load.  */
1866   gcc_assert (int_mode != BLKmode);
1867
1868   target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum, target,
1869                                     unsignedp, reverse);
1870
1871   /* Complex values must be reversed piecewise, so we need to undo the global
1872      reversal, convert to the complex mode and reverse again.  */
1873   if (reverse && COMPLEX_MODE_P (tmode))
1874     {
1875       target = flip_storage_order (int_mode, target);
1876       target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1877       target = flip_storage_order (tmode, target);
1878     }
1879   else
1880     target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1881
1882   return target;
1883 }
1884
1885 /* Generate code to extract a byte-field from STR_RTX
1886    containing BITSIZE bits, starting at BITNUM,
1887    and put it in TARGET if possible (if TARGET is nonzero).
1888    Regardless of TARGET, we return the rtx for where the value is placed.
1889
1890    STR_RTX is the structure containing the byte (a REG or MEM).
1891    UNSIGNEDP is nonzero if this is an unsigned bit field.
1892    MODE is the natural mode of the field value once extracted.
1893    TMODE is the mode the caller would like the value to have;
1894    but the value may be returned with type MODE instead.
1895
1896    If REVERSE is true, the extraction is to be done in reverse order.
1897
1898    If a TARGET is specified and we can store in it at no extra cost,
1899    we do so, and return TARGET.
1900    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1901    if they are equally easy.  */
1902
1903 rtx
1904 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1905                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1906                    machine_mode mode, machine_mode tmode, bool reverse)
1907 {
1908   machine_mode mode1;
1909
1910   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1911   if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1912     mode1 = GET_MODE (str_rtx);
1913   else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1914     mode1 = GET_MODE (target);
1915   else
1916     mode1 = tmode;
1917
1918   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1919     {
1920       /* Extraction of a full MODE1 value can be done with a simple load.
1921          We know here that the field can be accessed with one single
1922          instruction.  For targets that support unaligned memory,
1923          an unaligned access may be necessary.  */
1924       if (bitsize == GET_MODE_BITSIZE (mode1))
1925         {
1926           rtx result = adjust_bitfield_address (str_rtx, mode1,
1927                                                 bitnum / BITS_PER_UNIT);
1928           if (reverse)
1929             result = flip_storage_order (mode1, result);
1930           gcc_assert (bitnum % BITS_PER_UNIT == 0);
1931           return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1932         }
1933
1934       str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1935                                       &bitnum);
1936       gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (mode1));
1937       str_rtx = copy_to_reg (str_rtx);
1938     }
1939
1940   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1941                               target, mode, tmode, reverse, true);
1942 }
1943 \f
1944 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1945    from bit BITNUM of OP0.
1946
1947    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1948    If REVERSE is true, the extraction is to be done in reverse order.
1949
1950    If TARGET is nonzero, attempts to store the value there
1951    and return TARGET, but this is not guaranteed.
1952    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1953
1954 static rtx
1955 extract_fixed_bit_field (machine_mode tmode, rtx op0,
1956                          unsigned HOST_WIDE_INT bitsize,
1957                          unsigned HOST_WIDE_INT bitnum, rtx target,
1958                          int unsignedp, bool reverse)
1959 {
1960   if (MEM_P (op0))
1961     {
1962       machine_mode mode
1963         = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1964                          MEM_VOLATILE_P (op0));
1965
1966       if (mode == VOIDmode)
1967         /* The only way this should occur is if the field spans word
1968            boundaries.  */
1969         return extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1970                                         reverse);
1971
1972       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1973     }
1974
1975   return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1976                                     target, unsignedp, reverse);
1977 }
1978
1979 /* Helper function for extract_fixed_bit_field, extracts
1980    the bit field always using the MODE of OP0.  */
1981
1982 static rtx
1983 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
1984                            unsigned HOST_WIDE_INT bitsize,
1985                            unsigned HOST_WIDE_INT bitnum, rtx target,
1986                            int unsignedp, bool reverse)
1987 {
1988   machine_mode mode = GET_MODE (op0);
1989   gcc_assert (SCALAR_INT_MODE_P (mode));
1990
1991   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1992      for invalid input, such as extract equivalent of f5 from
1993      gcc.dg/pr48335-2.c.  */
1994
1995   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1996     /* BITNUM is the distance between our msb and that of OP0.
1997        Convert it to the distance from the lsb.  */
1998     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1999
2000   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2001      We have reduced the big-endian case to the little-endian case.  */
2002   if (reverse)
2003     op0 = flip_storage_order (mode, op0);
2004
2005   if (unsignedp)
2006     {
2007       if (bitnum)
2008         {
2009           /* If the field does not already start at the lsb,
2010              shift it so it does.  */
2011           /* Maybe propagate the target for the shift.  */
2012           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2013           if (tmode != mode)
2014             subtarget = 0;
2015           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2016         }
2017       /* Convert the value to the desired mode.  */
2018       if (mode != tmode)
2019         op0 = convert_to_mode (tmode, op0, 1);
2020
2021       /* Unless the msb of the field used to be the msb when we shifted,
2022          mask out the upper bits.  */
2023
2024       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2025         return expand_binop (GET_MODE (op0), and_optab, op0,
2026                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
2027                              target, 1, OPTAB_LIB_WIDEN);
2028       return op0;
2029     }
2030
2031   /* To extract a signed bit-field, first shift its msb to the msb of the word,
2032      then arithmetic-shift its lsb to the lsb of the word.  */
2033   op0 = force_reg (mode, op0);
2034
2035   /* Find the narrowest integer mode that contains the field.  */
2036
2037   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
2038        mode = GET_MODE_WIDER_MODE (mode))
2039     if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
2040       {
2041         op0 = convert_to_mode (mode, op0, 0);
2042         break;
2043       }
2044
2045   if (mode != tmode)
2046     target = 0;
2047
2048   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2049     {
2050       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2051       /* Maybe propagate the target for the shift.  */
2052       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2053       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2054     }
2055
2056   return expand_shift (RSHIFT_EXPR, mode, op0,
2057                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2058 }
2059
2060 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2061    VALUE << BITPOS.  */
2062
2063 static rtx
2064 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2065               int bitpos)
2066 {
2067   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2068 }
2069 \f
2070 /* Extract a bit field that is split across two words
2071    and return an RTX for the result.
2072
2073    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2074    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2075    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2076
2077    If REVERSE is true, the extraction is to be done in reverse order.  */
2078
2079 static rtx
2080 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2081                          unsigned HOST_WIDE_INT bitpos, int unsignedp,
2082                          bool reverse)
2083 {
2084   unsigned int unit;
2085   unsigned int bitsdone = 0;
2086   rtx result = NULL_RTX;
2087   int first = 1;
2088
2089   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2090      much at a time.  */
2091   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2092     unit = BITS_PER_WORD;
2093   else
2094     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2095
2096   while (bitsdone < bitsize)
2097     {
2098       unsigned HOST_WIDE_INT thissize;
2099       rtx part, word;
2100       unsigned HOST_WIDE_INT thispos;
2101       unsigned HOST_WIDE_INT offset;
2102
2103       offset = (bitpos + bitsdone) / unit;
2104       thispos = (bitpos + bitsdone) % unit;
2105
2106       /* THISSIZE must not overrun a word boundary.  Otherwise,
2107          extract_fixed_bit_field will call us again, and we will mutually
2108          recurse forever.  */
2109       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2110       thissize = MIN (thissize, unit - thispos);
2111
2112       /* If OP0 is a register, then handle OFFSET here.
2113
2114          When handling multiword bitfields, extract_bit_field may pass
2115          down a word_mode SUBREG of a larger REG for a bitfield that actually
2116          crosses a word boundary.  Thus, for a SUBREG, we must find
2117          the current word starting from the base register.  */
2118       if (GET_CODE (op0) == SUBREG)
2119         {
2120           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
2121           word = operand_subword_force (SUBREG_REG (op0), word_offset,
2122                                         GET_MODE (SUBREG_REG (op0)));
2123           offset = 0;
2124         }
2125       else if (REG_P (op0))
2126         {
2127           word = operand_subword_force (op0, offset, GET_MODE (op0));
2128           offset = 0;
2129         }
2130       else
2131         word = op0;
2132
2133       /* Extract the parts in bit-counting order,
2134          whose meaning is determined by BYTES_PER_UNIT.
2135          OFFSET is in UNITs, and UNIT is in bits.  */
2136       part = extract_fixed_bit_field (word_mode, word, thissize,
2137                                       offset * unit + thispos, 0, 1, reverse);
2138       bitsdone += thissize;
2139
2140       /* Shift this part into place for the result.  */
2141       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2142         {
2143           if (bitsize != bitsdone)
2144             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2145                                  bitsize - bitsdone, 0, 1);
2146         }
2147       else
2148         {
2149           if (bitsdone != thissize)
2150             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2151                                  bitsdone - thissize, 0, 1);
2152         }
2153
2154       if (first)
2155         result = part;
2156       else
2157         /* Combine the parts with bitwise or.  This works
2158            because we extracted each part as an unsigned bit field.  */
2159         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2160                                OPTAB_LIB_WIDEN);
2161
2162       first = 0;
2163     }
2164
2165   /* Unsigned bit field: we are done.  */
2166   if (unsignedp)
2167     return result;
2168   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2169   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2170                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2171   return expand_shift (RSHIFT_EXPR, word_mode, result,
2172                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2173 }
2174 \f
2175 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2176    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2177    MODE, fill the upper bits with zeros.  Fail if the layout of either
2178    mode is unknown (as for CC modes) or if the extraction would involve
2179    unprofitable mode punning.  Return the value on success, otherwise
2180    return null.
2181
2182    This is different from gen_lowpart* in these respects:
2183
2184      - the returned value must always be considered an rvalue
2185
2186      - when MODE is wider than SRC_MODE, the extraction involves
2187        a zero extension
2188
2189      - when MODE is smaller than SRC_MODE, the extraction involves
2190        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2191
2192    In other words, this routine performs a computation, whereas the
2193    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2194    operations.  */
2195
2196 rtx
2197 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2198 {
2199   machine_mode int_mode, src_int_mode;
2200
2201   if (mode == src_mode)
2202     return src;
2203
2204   if (CONSTANT_P (src))
2205     {
2206       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2207          fails, it will happily create (subreg (symbol_ref)) or similar
2208          invalid SUBREGs.  */
2209       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2210       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2211       if (ret)
2212         return ret;
2213
2214       if (GET_MODE (src) == VOIDmode
2215           || !validate_subreg (mode, src_mode, src, byte))
2216         return NULL_RTX;
2217
2218       src = force_reg (GET_MODE (src), src);
2219       return gen_rtx_SUBREG (mode, src, byte);
2220     }
2221
2222   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2223     return NULL_RTX;
2224
2225   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2226       && MODES_TIEABLE_P (mode, src_mode))
2227     {
2228       rtx x = gen_lowpart_common (mode, src);
2229       if (x)
2230         return x;
2231     }
2232
2233   src_int_mode = int_mode_for_mode (src_mode);
2234   int_mode = int_mode_for_mode (mode);
2235   if (src_int_mode == BLKmode || int_mode == BLKmode)
2236     return NULL_RTX;
2237
2238   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2239     return NULL_RTX;
2240   if (!MODES_TIEABLE_P (int_mode, mode))
2241     return NULL_RTX;
2242
2243   src = gen_lowpart (src_int_mode, src);
2244   src = convert_modes (int_mode, src_int_mode, src, true);
2245   src = gen_lowpart (mode, src);
2246   return src;
2247 }
2248 \f
2249 /* Add INC into TARGET.  */
2250
2251 void
2252 expand_inc (rtx target, rtx inc)
2253 {
2254   rtx value = expand_binop (GET_MODE (target), add_optab,
2255                             target, inc,
2256                             target, 0, OPTAB_LIB_WIDEN);
2257   if (value != target)
2258     emit_move_insn (target, value);
2259 }
2260
2261 /* Subtract DEC from TARGET.  */
2262
2263 void
2264 expand_dec (rtx target, rtx dec)
2265 {
2266   rtx value = expand_binop (GET_MODE (target), sub_optab,
2267                             target, dec,
2268                             target, 0, OPTAB_LIB_WIDEN);
2269   if (value != target)
2270     emit_move_insn (target, value);
2271 }
2272 \f
2273 /* Output a shift instruction for expression code CODE,
2274    with SHIFTED being the rtx for the value to shift,
2275    and AMOUNT the rtx for the amount to shift by.
2276    Store the result in the rtx TARGET, if that is convenient.
2277    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2278    Return the rtx for where the value is.  */
2279
2280 static rtx
2281 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2282                 rtx amount, rtx target, int unsignedp)
2283 {
2284   rtx op1, temp = 0;
2285   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2286   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2287   optab lshift_optab = ashl_optab;
2288   optab rshift_arith_optab = ashr_optab;
2289   optab rshift_uns_optab = lshr_optab;
2290   optab lrotate_optab = rotl_optab;
2291   optab rrotate_optab = rotr_optab;
2292   machine_mode op1_mode;
2293   machine_mode scalar_mode = mode;
2294   int attempt;
2295   bool speed = optimize_insn_for_speed_p ();
2296
2297   if (VECTOR_MODE_P (mode))
2298     scalar_mode = GET_MODE_INNER (mode);
2299   op1 = amount;
2300   op1_mode = GET_MODE (op1);
2301
2302   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2303      shift amount is a vector, use the vector/vector shift patterns.  */
2304   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2305     {
2306       lshift_optab = vashl_optab;
2307       rshift_arith_optab = vashr_optab;
2308       rshift_uns_optab = vlshr_optab;
2309       lrotate_optab = vrotl_optab;
2310       rrotate_optab = vrotr_optab;
2311     }
2312
2313   /* Previously detected shift-counts computed by NEGATE_EXPR
2314      and shifted in the other direction; but that does not work
2315      on all machines.  */
2316
2317   if (SHIFT_COUNT_TRUNCATED)
2318     {
2319       if (CONST_INT_P (op1)
2320           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2321               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2322         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2323                        % GET_MODE_BITSIZE (scalar_mode));
2324       else if (GET_CODE (op1) == SUBREG
2325                && subreg_lowpart_p (op1)
2326                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2327                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2328         op1 = SUBREG_REG (op1);
2329     }
2330
2331   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2332      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2333      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2334      amount instead.  */
2335   if (rotate
2336       && CONST_INT_P (op1)
2337       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2338                    GET_MODE_BITSIZE (scalar_mode) - 1))
2339     {
2340       op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2341       left = !left;
2342       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2343     }
2344
2345   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2346      Note that this is not the case for bigger values.  For instance a rotation
2347      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2348      0x04030201 (bswapsi).  */
2349   if (rotate
2350       && CONST_INT_P (op1)
2351       && INTVAL (op1) == BITS_PER_UNIT
2352       && GET_MODE_SIZE (scalar_mode) == 2
2353       && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2354     return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2355                                   unsignedp);
2356
2357   if (op1 == const0_rtx)
2358     return shifted;
2359
2360   /* Check whether its cheaper to implement a left shift by a constant
2361      bit count by a sequence of additions.  */
2362   if (code == LSHIFT_EXPR
2363       && CONST_INT_P (op1)
2364       && INTVAL (op1) > 0
2365       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2366       && INTVAL (op1) < MAX_BITS_PER_WORD
2367       && (shift_cost (speed, mode, INTVAL (op1))
2368           > INTVAL (op1) * add_cost (speed, mode))
2369       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2370     {
2371       int i;
2372       for (i = 0; i < INTVAL (op1); i++)
2373         {
2374           temp = force_reg (mode, shifted);
2375           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2376                                   unsignedp, OPTAB_LIB_WIDEN);
2377         }
2378       return shifted;
2379     }
2380
2381   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2382     {
2383       enum optab_methods methods;
2384
2385       if (attempt == 0)
2386         methods = OPTAB_DIRECT;
2387       else if (attempt == 1)
2388         methods = OPTAB_WIDEN;
2389       else
2390         methods = OPTAB_LIB_WIDEN;
2391
2392       if (rotate)
2393         {
2394           /* Widening does not work for rotation.  */
2395           if (methods == OPTAB_WIDEN)
2396             continue;
2397           else if (methods == OPTAB_LIB_WIDEN)
2398             {
2399               /* If we have been unable to open-code this by a rotation,
2400                  do it as the IOR of two shifts.  I.e., to rotate A
2401                  by N bits, compute
2402                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2403                  where C is the bitsize of A.
2404
2405                  It is theoretically possible that the target machine might
2406                  not be able to perform either shift and hence we would
2407                  be making two libcalls rather than just the one for the
2408                  shift (similarly if IOR could not be done).  We will allow
2409                  this extremely unlikely lossage to avoid complicating the
2410                  code below.  */
2411
2412               rtx subtarget = target == shifted ? 0 : target;
2413               rtx new_amount, other_amount;
2414               rtx temp1;
2415
2416               new_amount = op1;
2417               if (op1 == const0_rtx)
2418                 return shifted;
2419               else if (CONST_INT_P (op1))
2420                 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2421                                         - INTVAL (op1));
2422               else
2423                 {
2424                   other_amount
2425                     = simplify_gen_unary (NEG, GET_MODE (op1),
2426                                           op1, GET_MODE (op1));
2427                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2428                   other_amount
2429                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2430                                            gen_int_mode (mask, GET_MODE (op1)));
2431                 }
2432
2433               shifted = force_reg (mode, shifted);
2434
2435               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2436                                      mode, shifted, new_amount, 0, 1);
2437               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2438                                       mode, shifted, other_amount,
2439                                       subtarget, 1);
2440               return expand_binop (mode, ior_optab, temp, temp1, target,
2441                                    unsignedp, methods);
2442             }
2443
2444           temp = expand_binop (mode,
2445                                left ? lrotate_optab : rrotate_optab,
2446                                shifted, op1, target, unsignedp, methods);
2447         }
2448       else if (unsignedp)
2449         temp = expand_binop (mode,
2450                              left ? lshift_optab : rshift_uns_optab,
2451                              shifted, op1, target, unsignedp, methods);
2452
2453       /* Do arithmetic shifts.
2454          Also, if we are going to widen the operand, we can just as well
2455          use an arithmetic right-shift instead of a logical one.  */
2456       if (temp == 0 && ! rotate
2457           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2458         {
2459           enum optab_methods methods1 = methods;
2460
2461           /* If trying to widen a log shift to an arithmetic shift,
2462              don't accept an arithmetic shift of the same size.  */
2463           if (unsignedp)
2464             methods1 = OPTAB_MUST_WIDEN;
2465
2466           /* Arithmetic shift */
2467
2468           temp = expand_binop (mode,
2469                                left ? lshift_optab : rshift_arith_optab,
2470                                shifted, op1, target, unsignedp, methods1);
2471         }
2472
2473       /* We used to try extzv here for logical right shifts, but that was
2474          only useful for one machine, the VAX, and caused poor code
2475          generation there for lshrdi3, so the code was deleted and a
2476          define_expand for lshrsi3 was added to vax.md.  */
2477     }
2478
2479   gcc_assert (temp);
2480   return temp;
2481 }
2482
2483 /* Output a shift instruction for expression code CODE,
2484    with SHIFTED being the rtx for the value to shift,
2485    and AMOUNT the amount to shift by.
2486    Store the result in the rtx TARGET, if that is convenient.
2487    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2488    Return the rtx for where the value is.  */
2489
2490 rtx
2491 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2492               int amount, rtx target, int unsignedp)
2493 {
2494   return expand_shift_1 (code, mode,
2495                          shifted, GEN_INT (amount), target, unsignedp);
2496 }
2497
2498 /* Output a shift instruction for expression code CODE,
2499    with SHIFTED being the rtx for the value to shift,
2500    and AMOUNT the tree for the amount to shift by.
2501    Store the result in the rtx TARGET, if that is convenient.
2502    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2503    Return the rtx for where the value is.  */
2504
2505 rtx
2506 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2507                        tree amount, rtx target, int unsignedp)
2508 {
2509   return expand_shift_1 (code, mode,
2510                          shifted, expand_normal (amount), target, unsignedp);
2511 }
2512
2513 \f
2514 /* Indicates the type of fixup needed after a constant multiplication.
2515    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2516    the result should be negated, and ADD_VARIANT means that the
2517    multiplicand should be added to the result.  */
2518 enum mult_variant {basic_variant, negate_variant, add_variant};
2519
2520 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2521                         const struct mult_cost *, machine_mode mode);
2522 static bool choose_mult_variant (machine_mode, HOST_WIDE_INT,
2523                                  struct algorithm *, enum mult_variant *, int);
2524 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2525                               const struct algorithm *, enum mult_variant);
2526 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2527 static rtx extract_high_half (machine_mode, rtx);
2528 static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2529 static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
2530                                        int, int);
2531 /* Compute and return the best algorithm for multiplying by T.
2532    The algorithm must cost less than cost_limit
2533    If retval.cost >= COST_LIMIT, no algorithm was found and all
2534    other field of the returned struct are undefined.
2535    MODE is the machine mode of the multiplication.  */
2536
2537 static void
2538 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2539             const struct mult_cost *cost_limit, machine_mode mode)
2540 {
2541   int m;
2542   struct algorithm *alg_in, *best_alg;
2543   struct mult_cost best_cost;
2544   struct mult_cost new_limit;
2545   int op_cost, op_latency;
2546   unsigned HOST_WIDE_INT orig_t = t;
2547   unsigned HOST_WIDE_INT q;
2548   int maxm, hash_index;
2549   bool cache_hit = false;
2550   enum alg_code cache_alg = alg_zero;
2551   bool speed = optimize_insn_for_speed_p ();
2552   machine_mode imode;
2553   struct alg_hash_entry *entry_ptr;
2554
2555   /* Indicate that no algorithm is yet found.  If no algorithm
2556      is found, this value will be returned and indicate failure.  */
2557   alg_out->cost.cost = cost_limit->cost + 1;
2558   alg_out->cost.latency = cost_limit->latency + 1;
2559
2560   if (cost_limit->cost < 0
2561       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2562     return;
2563
2564   /* Be prepared for vector modes.  */
2565   imode = GET_MODE_INNER (mode);
2566
2567   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2568
2569   /* Restrict the bits of "t" to the multiplication's mode.  */
2570   t &= GET_MODE_MASK (imode);
2571
2572   /* t == 1 can be done in zero cost.  */
2573   if (t == 1)
2574     {
2575       alg_out->ops = 1;
2576       alg_out->cost.cost = 0;
2577       alg_out->cost.latency = 0;
2578       alg_out->op[0] = alg_m;
2579       return;
2580     }
2581
2582   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2583      fail now.  */
2584   if (t == 0)
2585     {
2586       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2587         return;
2588       else
2589         {
2590           alg_out->ops = 1;
2591           alg_out->cost.cost = zero_cost (speed);
2592           alg_out->cost.latency = zero_cost (speed);
2593           alg_out->op[0] = alg_zero;
2594           return;
2595         }
2596     }
2597
2598   /* We'll be needing a couple extra algorithm structures now.  */
2599
2600   alg_in = XALLOCA (struct algorithm);
2601   best_alg = XALLOCA (struct algorithm);
2602   best_cost = *cost_limit;
2603
2604   /* Compute the hash index.  */
2605   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2606
2607   /* See if we already know what to do for T.  */
2608   entry_ptr = alg_hash_entry_ptr (hash_index);
2609   if (entry_ptr->t == t
2610       && entry_ptr->mode == mode
2611       && entry_ptr->mode == mode
2612       && entry_ptr->speed == speed
2613       && entry_ptr->alg != alg_unknown)
2614     {
2615       cache_alg = entry_ptr->alg;
2616
2617       if (cache_alg == alg_impossible)
2618         {
2619           /* The cache tells us that it's impossible to synthesize
2620              multiplication by T within entry_ptr->cost.  */
2621           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2622             /* COST_LIMIT is at least as restrictive as the one
2623                recorded in the hash table, in which case we have no
2624                hope of synthesizing a multiplication.  Just
2625                return.  */
2626             return;
2627
2628           /* If we get here, COST_LIMIT is less restrictive than the
2629              one recorded in the hash table, so we may be able to
2630              synthesize a multiplication.  Proceed as if we didn't
2631              have the cache entry.  */
2632         }
2633       else
2634         {
2635           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2636             /* The cached algorithm shows that this multiplication
2637                requires more cost than COST_LIMIT.  Just return.  This
2638                way, we don't clobber this cache entry with
2639                alg_impossible but retain useful information.  */
2640             return;
2641
2642           cache_hit = true;
2643
2644           switch (cache_alg)
2645             {
2646             case alg_shift:
2647               goto do_alg_shift;
2648
2649             case alg_add_t_m2:
2650             case alg_sub_t_m2:
2651               goto do_alg_addsub_t_m2;
2652
2653             case alg_add_factor:
2654             case alg_sub_factor:
2655               goto do_alg_addsub_factor;
2656
2657             case alg_add_t2_m:
2658               goto do_alg_add_t2_m;
2659
2660             case alg_sub_t2_m:
2661               goto do_alg_sub_t2_m;
2662
2663             default:
2664               gcc_unreachable ();
2665             }
2666         }
2667     }
2668
2669   /* If we have a group of zero bits at the low-order part of T, try
2670      multiplying by the remaining bits and then doing a shift.  */
2671
2672   if ((t & 1) == 0)
2673     {
2674     do_alg_shift:
2675       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2676       if (m < maxm)
2677         {
2678           q = t >> m;
2679           /* The function expand_shift will choose between a shift and
2680              a sequence of additions, so the observed cost is given as
2681              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2682           op_cost = m * add_cost (speed, mode);
2683           if (shift_cost (speed, mode, m) < op_cost)
2684             op_cost = shift_cost (speed, mode, m);
2685           new_limit.cost = best_cost.cost - op_cost;
2686           new_limit.latency = best_cost.latency - op_cost;
2687           synth_mult (alg_in, q, &new_limit, mode);
2688
2689           alg_in->cost.cost += op_cost;
2690           alg_in->cost.latency += op_cost;
2691           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2692             {
2693               best_cost = alg_in->cost;
2694               std::swap (alg_in, best_alg);
2695               best_alg->log[best_alg->ops] = m;
2696               best_alg->op[best_alg->ops] = alg_shift;
2697             }
2698
2699           /* See if treating ORIG_T as a signed number yields a better
2700              sequence.  Try this sequence only for a negative ORIG_T
2701              as it would be useless for a non-negative ORIG_T.  */
2702           if ((HOST_WIDE_INT) orig_t < 0)
2703             {
2704               /* Shift ORIG_T as follows because a right shift of a
2705                  negative-valued signed type is implementation
2706                  defined.  */
2707               q = ~(~orig_t >> m);
2708               /* The function expand_shift will choose between a shift
2709                  and a sequence of additions, so the observed cost is
2710                  given as MIN (m * add_cost(speed, mode),
2711                  shift_cost(speed, mode, m)).  */
2712               op_cost = m * add_cost (speed, mode);
2713               if (shift_cost (speed, mode, m) < op_cost)
2714                 op_cost = shift_cost (speed, mode, m);
2715               new_limit.cost = best_cost.cost - op_cost;
2716               new_limit.latency = best_cost.latency - op_cost;
2717               synth_mult (alg_in, q, &new_limit, mode);
2718
2719               alg_in->cost.cost += op_cost;
2720               alg_in->cost.latency += op_cost;
2721               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2722                 {
2723                   best_cost = alg_in->cost;
2724                   std::swap (alg_in, best_alg);
2725                   best_alg->log[best_alg->ops] = m;
2726                   best_alg->op[best_alg->ops] = alg_shift;
2727                 }
2728             }
2729         }
2730       if (cache_hit)
2731         goto done;
2732     }
2733
2734   /* If we have an odd number, add or subtract one.  */
2735   if ((t & 1) != 0)
2736     {
2737       unsigned HOST_WIDE_INT w;
2738
2739     do_alg_addsub_t_m2:
2740       for (w = 1; (w & t) != 0; w <<= 1)
2741         ;
2742       /* If T was -1, then W will be zero after the loop.  This is another
2743          case where T ends with ...111.  Handling this with (T + 1) and
2744          subtract 1 produces slightly better code and results in algorithm
2745          selection much faster than treating it like the ...0111 case
2746          below.  */
2747       if (w == 0
2748           || (w > 2
2749               /* Reject the case where t is 3.
2750                  Thus we prefer addition in that case.  */
2751               && t != 3))
2752         {
2753           /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2754
2755           op_cost = add_cost (speed, mode);
2756           new_limit.cost = best_cost.cost - op_cost;
2757           new_limit.latency = best_cost.latency - op_cost;
2758           synth_mult (alg_in, t + 1, &new_limit, mode);
2759
2760           alg_in->cost.cost += op_cost;
2761           alg_in->cost.latency += op_cost;
2762           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2763             {
2764               best_cost = alg_in->cost;
2765               std::swap (alg_in, best_alg);
2766               best_alg->log[best_alg->ops] = 0;
2767               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2768             }
2769         }
2770       else
2771         {
2772           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2773
2774           op_cost = add_cost (speed, mode);
2775           new_limit.cost = best_cost.cost - op_cost;
2776           new_limit.latency = best_cost.latency - op_cost;
2777           synth_mult (alg_in, t - 1, &new_limit, mode);
2778
2779           alg_in->cost.cost += op_cost;
2780           alg_in->cost.latency += op_cost;
2781           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2782             {
2783               best_cost = alg_in->cost;
2784               std::swap (alg_in, best_alg);
2785               best_alg->log[best_alg->ops] = 0;
2786               best_alg->op[best_alg->ops] = alg_add_t_m2;
2787             }
2788         }
2789
2790       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2791          quickly with a - a * n for some appropriate constant n.  */
2792       m = exact_log2 (-orig_t + 1);
2793       if (m >= 0 && m < maxm)
2794         {
2795           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2796           /* If the target has a cheap shift-and-subtract insn use
2797              that in preference to a shift insn followed by a sub insn.
2798              Assume that the shift-and-sub is "atomic" with a latency
2799              equal to it's cost, otherwise assume that on superscalar
2800              hardware the shift may be executed concurrently with the
2801              earlier steps in the algorithm.  */
2802           if (shiftsub1_cost (speed, mode, m) <= op_cost)
2803             {
2804               op_cost = shiftsub1_cost (speed, mode, m);
2805               op_latency = op_cost;
2806             }
2807           else
2808             op_latency = add_cost (speed, mode);
2809
2810           new_limit.cost = best_cost.cost - op_cost;
2811           new_limit.latency = best_cost.latency - op_latency;
2812           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2813                       &new_limit, mode);
2814
2815           alg_in->cost.cost += op_cost;
2816           alg_in->cost.latency += op_latency;
2817           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2818             {
2819               best_cost = alg_in->cost;
2820               std::swap (alg_in, best_alg);
2821               best_alg->log[best_alg->ops] = m;
2822               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2823             }
2824         }
2825
2826       if (cache_hit)
2827         goto done;
2828     }
2829
2830   /* Look for factors of t of the form
2831      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2832      If we find such a factor, we can multiply by t using an algorithm that
2833      multiplies by q, shift the result by m and add/subtract it to itself.
2834
2835      We search for large factors first and loop down, even if large factors
2836      are less probable than small; if we find a large factor we will find a
2837      good sequence quickly, and therefore be able to prune (by decreasing
2838      COST_LIMIT) the search.  */
2839
2840  do_alg_addsub_factor:
2841   for (m = floor_log2 (t - 1); m >= 2; m--)
2842     {
2843       unsigned HOST_WIDE_INT d;
2844
2845       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2846       if (t % d == 0 && t > d && m < maxm
2847           && (!cache_hit || cache_alg == alg_add_factor))
2848         {
2849           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2850           if (shiftadd_cost (speed, mode, m) <= op_cost)
2851             op_cost = shiftadd_cost (speed, mode, m);
2852
2853           op_latency = op_cost;
2854
2855
2856           new_limit.cost = best_cost.cost - op_cost;
2857           new_limit.latency = best_cost.latency - op_latency;
2858           synth_mult (alg_in, t / d, &new_limit, mode);
2859
2860           alg_in->cost.cost += op_cost;
2861           alg_in->cost.latency += op_latency;
2862           if (alg_in->cost.latency < op_cost)
2863             alg_in->cost.latency = op_cost;
2864           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2865             {
2866               best_cost = alg_in->cost;
2867               std::swap (alg_in, best_alg);
2868               best_alg->log[best_alg->ops] = m;
2869               best_alg->op[best_alg->ops] = alg_add_factor;
2870             }
2871           /* Other factors will have been taken care of in the recursion.  */
2872           break;
2873         }
2874
2875       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2876       if (t % d == 0 && t > d && m < maxm
2877           && (!cache_hit || cache_alg == alg_sub_factor))
2878         {
2879           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2880           if (shiftsub0_cost (speed, mode, m) <= op_cost)
2881             op_cost = shiftsub0_cost (speed, mode, m);
2882
2883           op_latency = op_cost;
2884
2885           new_limit.cost = best_cost.cost - op_cost;
2886           new_limit.latency = best_cost.latency - op_latency;
2887           synth_mult (alg_in, t / d, &new_limit, mode);
2888
2889           alg_in->cost.cost += op_cost;
2890           alg_in->cost.latency += op_latency;
2891           if (alg_in->cost.latency < op_cost)
2892             alg_in->cost.latency = op_cost;
2893           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2894             {
2895               best_cost = alg_in->cost;
2896               std::swap (alg_in, best_alg);
2897               best_alg->log[best_alg->ops] = m;
2898               best_alg->op[best_alg->ops] = alg_sub_factor;
2899             }
2900           break;
2901         }
2902     }
2903   if (cache_hit)
2904     goto done;
2905
2906   /* Try shift-and-add (load effective address) instructions,
2907      i.e. do a*3, a*5, a*9.  */
2908   if ((t & 1) != 0)
2909     {
2910     do_alg_add_t2_m:
2911       q = t - 1;
2912       q = q & -q;
2913       m = exact_log2 (q);
2914       if (m >= 0 && m < maxm)
2915         {
2916           op_cost = shiftadd_cost (speed, mode, m);
2917           new_limit.cost = best_cost.cost - op_cost;
2918           new_limit.latency = best_cost.latency - op_cost;
2919           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2920
2921           alg_in->cost.cost += op_cost;
2922           alg_in->cost.latency += op_cost;
2923           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2924             {
2925               best_cost = alg_in->cost;
2926               std::swap (alg_in, best_alg);
2927               best_alg->log[best_alg->ops] = m;
2928               best_alg->op[best_alg->ops] = alg_add_t2_m;
2929             }
2930         }
2931       if (cache_hit)
2932         goto done;
2933
2934     do_alg_sub_t2_m:
2935       q = t + 1;
2936       q = q & -q;
2937       m = exact_log2 (q);
2938       if (m >= 0 && m < maxm)
2939         {
2940           op_cost = shiftsub0_cost (speed, mode, m);
2941           new_limit.cost = best_cost.cost - op_cost;
2942           new_limit.latency = best_cost.latency - op_cost;
2943           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2944
2945           alg_in->cost.cost += op_cost;
2946           alg_in->cost.latency += op_cost;
2947           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2948             {
2949               best_cost = alg_in->cost;
2950               std::swap (alg_in, best_alg);
2951               best_alg->log[best_alg->ops] = m;
2952               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2953             }
2954         }
2955       if (cache_hit)
2956         goto done;
2957     }
2958
2959  done:
2960   /* If best_cost has not decreased, we have not found any algorithm.  */
2961   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2962     {
2963       /* We failed to find an algorithm.  Record alg_impossible for
2964          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2965          we are asked to find an algorithm for T within the same or
2966          lower COST_LIMIT, we can immediately return to the
2967          caller.  */
2968       entry_ptr->t = t;
2969       entry_ptr->mode = mode;
2970       entry_ptr->speed = speed;
2971       entry_ptr->alg = alg_impossible;
2972       entry_ptr->cost = *cost_limit;
2973       return;
2974     }
2975
2976   /* Cache the result.  */
2977   if (!cache_hit)
2978     {
2979       entry_ptr->t = t;
2980       entry_ptr->mode = mode;
2981       entry_ptr->speed = speed;
2982       entry_ptr->alg = best_alg->op[best_alg->ops];
2983       entry_ptr->cost.cost = best_cost.cost;
2984       entry_ptr->cost.latency = best_cost.latency;
2985     }
2986
2987   /* If we are getting a too long sequence for `struct algorithm'
2988      to record, make this search fail.  */
2989   if (best_alg->ops == MAX_BITS_PER_WORD)
2990     return;
2991
2992   /* Copy the algorithm from temporary space to the space at alg_out.
2993      We avoid using structure assignment because the majority of
2994      best_alg is normally undefined, and this is a critical function.  */
2995   alg_out->ops = best_alg->ops + 1;
2996   alg_out->cost = best_cost;
2997   memcpy (alg_out->op, best_alg->op,
2998           alg_out->ops * sizeof *alg_out->op);
2999   memcpy (alg_out->log, best_alg->log,
3000           alg_out->ops * sizeof *alg_out->log);
3001 }
3002 \f
3003 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3004    Try three variations:
3005
3006        - a shift/add sequence based on VAL itself
3007        - a shift/add sequence based on -VAL, followed by a negation
3008        - a shift/add sequence based on VAL - 1, followed by an addition.
3009
3010    Return true if the cheapest of these cost less than MULT_COST,
3011    describing the algorithm in *ALG and final fixup in *VARIANT.  */
3012
3013 static bool
3014 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3015                      struct algorithm *alg, enum mult_variant *variant,
3016                      int mult_cost)
3017 {
3018   struct algorithm alg2;
3019   struct mult_cost limit;
3020   int op_cost;
3021   bool speed = optimize_insn_for_speed_p ();
3022
3023   /* Fail quickly for impossible bounds.  */
3024   if (mult_cost < 0)
3025     return false;
3026
3027   /* Ensure that mult_cost provides a reasonable upper bound.
3028      Any constant multiplication can be performed with less
3029      than 2 * bits additions.  */
3030   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3031   if (mult_cost > op_cost)
3032     mult_cost = op_cost;
3033
3034   *variant = basic_variant;
3035   limit.cost = mult_cost;
3036   limit.latency = mult_cost;
3037   synth_mult (alg, val, &limit, mode);
3038
3039   /* This works only if the inverted value actually fits in an
3040      `unsigned int' */
3041   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3042     {
3043       op_cost = neg_cost (speed, mode);
3044       if (MULT_COST_LESS (&alg->cost, mult_cost))
3045         {
3046           limit.cost = alg->cost.cost - op_cost;
3047           limit.latency = alg->cost.latency - op_cost;
3048         }
3049       else
3050         {
3051           limit.cost = mult_cost - op_cost;
3052           limit.latency = mult_cost - op_cost;
3053         }
3054
3055       synth_mult (&alg2, -val, &limit, mode);
3056       alg2.cost.cost += op_cost;
3057       alg2.cost.latency += op_cost;
3058       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3059         *alg = alg2, *variant = negate_variant;
3060     }
3061
3062   /* This proves very useful for division-by-constant.  */
3063   op_cost = add_cost (speed, mode);
3064   if (MULT_COST_LESS (&alg->cost, mult_cost))
3065     {
3066       limit.cost = alg->cost.cost - op_cost;
3067       limit.latency = alg->cost.latency - op_cost;
3068     }
3069   else
3070     {
3071       limit.cost = mult_cost - op_cost;
3072       limit.latency = mult_cost - op_cost;
3073     }
3074
3075   synth_mult (&alg2, val - 1, &limit, mode);
3076   alg2.cost.cost += op_cost;
3077   alg2.cost.latency += op_cost;
3078   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3079     *alg = alg2, *variant = add_variant;
3080
3081   return MULT_COST_LESS (&alg->cost, mult_cost);
3082 }
3083
3084 /* A subroutine of expand_mult, used for constant multiplications.
3085    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3086    convenient.  Use the shift/add sequence described by ALG and apply
3087    the final fixup specified by VARIANT.  */
3088
3089 static rtx
3090 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3091                    rtx target, const struct algorithm *alg,
3092                    enum mult_variant variant)
3093 {
3094   HOST_WIDE_INT val_so_far;
3095   rtx_insn *insn;
3096   rtx accum, tem;
3097   int opno;
3098   machine_mode nmode;
3099
3100   /* Avoid referencing memory over and over and invalid sharing
3101      on SUBREGs.  */
3102   op0 = force_reg (mode, op0);
3103
3104   /* ACCUM starts out either as OP0 or as a zero, depending on
3105      the first operation.  */
3106
3107   if (alg->op[0] == alg_zero)
3108     {
3109       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3110       val_so_far = 0;
3111     }
3112   else if (alg->op[0] == alg_m)
3113     {
3114       accum = copy_to_mode_reg (mode, op0);
3115       val_so_far = 1;
3116     }
3117   else
3118     gcc_unreachable ();
3119
3120   for (opno = 1; opno < alg->ops; opno++)
3121     {
3122       int log = alg->log[opno];
3123       rtx shift_subtarget = optimize ? 0 : accum;
3124       rtx add_target
3125         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3126            && !optimize)
3127           ? target : 0;
3128       rtx accum_target = optimize ? 0 : accum;
3129       rtx accum_inner;
3130
3131       switch (alg->op[opno])
3132         {
3133         case alg_shift:
3134           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3135           /* REG_EQUAL note will be attached to the following insn.  */
3136           emit_move_insn (accum, tem);
3137           val_so_far <<= log;
3138           break;
3139
3140         case alg_add_t_m2:
3141           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3142           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3143                                  add_target ? add_target : accum_target);
3144           val_so_far += (HOST_WIDE_INT) 1 << log;
3145           break;
3146
3147         case alg_sub_t_m2:
3148           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3149           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3150                                  add_target ? add_target : accum_target);
3151           val_so_far -= (HOST_WIDE_INT) 1 << log;
3152           break;
3153
3154         case alg_add_t2_m:
3155           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3156                                 log, shift_subtarget, 0);
3157           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3158                                  add_target ? add_target : accum_target);
3159           val_so_far = (val_so_far << log) + 1;
3160           break;
3161
3162         case alg_sub_t2_m:
3163           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3164                                 log, shift_subtarget, 0);
3165           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3166                                  add_target ? add_target : accum_target);
3167           val_so_far = (val_so_far << log) - 1;
3168           break;
3169
3170         case alg_add_factor:
3171           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3172           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3173                                  add_target ? add_target : accum_target);
3174           val_so_far += val_so_far << log;
3175           break;
3176
3177         case alg_sub_factor:
3178           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3179           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3180                                  (add_target
3181                                   ? add_target : (optimize ? 0 : tem)));
3182           val_so_far = (val_so_far << log) - val_so_far;
3183           break;
3184
3185         default:
3186           gcc_unreachable ();
3187         }
3188
3189       if (SCALAR_INT_MODE_P (mode))
3190         {
3191           /* Write a REG_EQUAL note on the last insn so that we can cse
3192              multiplication sequences.  Note that if ACCUM is a SUBREG,
3193              we've set the inner register and must properly indicate that.  */
3194           tem = op0, nmode = mode;
3195           accum_inner = accum;
3196           if (GET_CODE (accum) == SUBREG)
3197             {
3198               accum_inner = SUBREG_REG (accum);
3199               nmode = GET_MODE (accum_inner);
3200               tem = gen_lowpart (nmode, op0);
3201             }
3202
3203           insn = get_last_insn ();
3204           set_dst_reg_note (insn, REG_EQUAL,
3205                             gen_rtx_MULT (nmode, tem,
3206                                           gen_int_mode (val_so_far, nmode)),
3207                             accum_inner);
3208         }
3209     }
3210
3211   if (variant == negate_variant)
3212     {
3213       val_so_far = -val_so_far;
3214       accum = expand_unop (mode, neg_optab, accum, target, 0);
3215     }
3216   else if (variant == add_variant)
3217     {
3218       val_so_far = val_so_far + 1;
3219       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3220     }
3221
3222   /* Compare only the bits of val and val_so_far that are significant
3223      in the result mode, to avoid sign-/zero-extension confusion.  */
3224   nmode = GET_MODE_INNER (mode);
3225   val &= GET_MODE_MASK (nmode);
3226   val_so_far &= GET_MODE_MASK (nmode);
3227   gcc_assert (val == val_so_far);
3228
3229   return accum;
3230 }
3231
3232 /* Perform a multiplication and return an rtx for the result.
3233    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3234    TARGET is a suggestion for where to store the result (an rtx).
3235
3236    We check specially for a constant integer as OP1.
3237    If you want this check for OP0 as well, then before calling
3238    you should swap the two operands if OP0 would be constant.  */
3239
3240 rtx
3241 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3242              int unsignedp)
3243 {
3244   enum mult_variant variant;
3245   struct algorithm algorithm;
3246   rtx scalar_op1;
3247   int max_cost;
3248   bool speed = optimize_insn_for_speed_p ();
3249   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3250
3251   if (CONSTANT_P (op0))
3252     std::swap (op0, op1);
3253
3254   /* For vectors, there are several simplifications that can be made if
3255      all elements of the vector constant are identical.  */
3256   scalar_op1 = unwrap_const_vec_duplicate (op1);
3257
3258   if (INTEGRAL_MODE_P (mode))
3259     {
3260       rtx fake_reg;
3261       HOST_WIDE_INT coeff;
3262       bool is_neg;
3263       int mode_bitsize;
3264
3265       if (op1 == CONST0_RTX (mode))
3266         return op1;
3267       if (op1 == CONST1_RTX (mode))
3268         return op0;
3269       if (op1 == CONSTM1_RTX (mode))
3270         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3271                             op0, target, 0);
3272
3273       if (do_trapv)
3274         goto skip_synth;
3275
3276       /* If mode is integer vector mode, check if the backend supports
3277          vector lshift (by scalar or vector) at all.  If not, we can't use
3278          synthetized multiply.  */
3279       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3280           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3281           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3282         goto skip_synth;
3283
3284       /* These are the operations that are potentially turned into
3285          a sequence of shifts and additions.  */
3286       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3287
3288       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3289          less than or equal in size to `unsigned int' this doesn't matter.
3290          If the mode is larger than `unsigned int', then synth_mult works
3291          only if the constant value exactly fits in an `unsigned int' without
3292          any truncation.  This means that multiplying by negative values does
3293          not work; results are off by 2^32 on a 32 bit machine.  */
3294       if (CONST_INT_P (scalar_op1))
3295         {
3296           coeff = INTVAL (scalar_op1);
3297           is_neg = coeff < 0;
3298         }
3299 #if TARGET_SUPPORTS_WIDE_INT
3300       else if (CONST_WIDE_INT_P (scalar_op1))
3301 #else
3302       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3303 #endif
3304         {
3305           int shift = wi::exact_log2 (std::make_pair (scalar_op1, mode));
3306           /* Perfect power of 2 (other than 1, which is handled above).  */
3307           if (shift > 0)
3308             return expand_shift (LSHIFT_EXPR, mode, op0,
3309                                  shift, target, unsignedp);
3310           else
3311             goto skip_synth;
3312         }
3313       else
3314         goto skip_synth;
3315
3316       /* We used to test optimize here, on the grounds that it's better to
3317          produce a smaller program when -O is not used.  But this causes
3318          such a terrible slowdown sometimes that it seems better to always
3319          use synth_mult.  */
3320
3321       /* Special case powers of two.  */
3322       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3323           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3324         return expand_shift (LSHIFT_EXPR, mode, op0,
3325                              floor_log2 (coeff), target, unsignedp);
3326
3327       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3328
3329       /* Attempt to handle multiplication of DImode values by negative
3330          coefficients, by performing the multiplication by a positive
3331          multiplier and then inverting the result.  */
3332       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3333         {
3334           /* Its safe to use -coeff even for INT_MIN, as the
3335              result is interpreted as an unsigned coefficient.
3336              Exclude cost of op0 from max_cost to match the cost
3337              calculation of the synth_mult.  */
3338           coeff = -(unsigned HOST_WIDE_INT) coeff;
3339           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3340                                     mode, speed)
3341                       - neg_cost (speed, mode));
3342           if (max_cost <= 0)
3343             goto skip_synth;
3344
3345           /* Special case powers of two.  */
3346           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3347             {
3348               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3349                                        floor_log2 (coeff), target, unsignedp);
3350               return expand_unop (mode, neg_optab, temp, target, 0);
3351             }
3352
3353           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3354                                    max_cost))
3355             {
3356               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3357                                             &algorithm, variant);
3358               return expand_unop (mode, neg_optab, temp, target, 0);
3359             }
3360           goto skip_synth;
3361         }
3362
3363       /* Exclude cost of op0 from max_cost to match the cost
3364          calculation of the synth_mult.  */
3365       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3366       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3367         return expand_mult_const (mode, op0, coeff, target,
3368                                   &algorithm, variant);
3369     }
3370  skip_synth:
3371
3372   /* Expand x*2.0 as x+x.  */
3373   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3374       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3375     {
3376       op0 = force_reg (GET_MODE (op0), op0);
3377       return expand_binop (mode, add_optab, op0, op0,
3378                            target, unsignedp, OPTAB_LIB_WIDEN);
3379     }
3380
3381   /* This used to use umul_optab if unsigned, but for non-widening multiply
3382      there is no difference between signed and unsigned.  */
3383   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3384                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3385   gcc_assert (op0);
3386   return op0;
3387 }
3388
3389 /* Return a cost estimate for multiplying a register by the given
3390    COEFFicient in the given MODE and SPEED.  */
3391
3392 int
3393 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3394 {
3395   int max_cost;
3396   struct algorithm algorithm;
3397   enum mult_variant variant;
3398
3399   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3400   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3401                            mode, speed);
3402   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3403     return algorithm.cost.cost;
3404   else
3405     return max_cost;
3406 }
3407
3408 /* Perform a widening multiplication and return an rtx for the result.
3409    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3410    TARGET is a suggestion for where to store the result (an rtx).
3411    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3412    or smul_widen_optab.
3413
3414    We check specially for a constant integer as OP1, comparing the
3415    cost of a widening multiply against the cost of a sequence of shifts
3416    and adds.  */
3417
3418 rtx
3419 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3420                       int unsignedp, optab this_optab)
3421 {
3422   bool speed = optimize_insn_for_speed_p ();
3423   rtx cop1;
3424
3425   if (CONST_INT_P (op1)
3426       && GET_MODE (op0) != VOIDmode
3427       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3428                                 this_optab == umul_widen_optab))
3429       && CONST_INT_P (cop1)
3430       && (INTVAL (cop1) >= 0
3431           || HWI_COMPUTABLE_MODE_P (mode)))
3432     {
3433       HOST_WIDE_INT coeff = INTVAL (cop1);
3434       int max_cost;
3435       enum mult_variant variant;
3436       struct algorithm algorithm;
3437
3438       if (coeff == 0)
3439         return CONST0_RTX (mode);
3440
3441       /* Special case powers of two.  */
3442       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3443         {
3444           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3445           return expand_shift (LSHIFT_EXPR, mode, op0,
3446                                floor_log2 (coeff), target, unsignedp);
3447         }
3448
3449       /* Exclude cost of op0 from max_cost to match the cost
3450          calculation of the synth_mult.  */
3451       max_cost = mul_widen_cost (speed, mode);
3452       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3453                                max_cost))
3454         {
3455           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3456           return expand_mult_const (mode, op0, coeff, target,
3457                                     &algorithm, variant);
3458         }
3459     }
3460   return expand_binop (mode, this_optab, op0, op1, target,
3461                        unsignedp, OPTAB_LIB_WIDEN);
3462 }
3463 \f
3464 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3465    replace division by D, and put the least significant N bits of the result
3466    in *MULTIPLIER_PTR and return the most significant bit.
3467
3468    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3469    needed precision is in PRECISION (should be <= N).
3470
3471    PRECISION should be as small as possible so this function can choose
3472    multiplier more freely.
3473
3474    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3475    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3476
3477    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3478    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3479
3480 unsigned HOST_WIDE_INT
3481 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3482                    unsigned HOST_WIDE_INT *multiplier_ptr,
3483                    int *post_shift_ptr, int *lgup_ptr)
3484 {
3485   int lgup, post_shift;
3486   int pow, pow2;
3487
3488   /* lgup = ceil(log2(divisor)); */
3489   lgup = ceil_log2 (d);
3490
3491   gcc_assert (lgup <= n);
3492
3493   pow = n + lgup;
3494   pow2 = n + lgup - precision;
3495
3496   /* mlow = 2^(N + lgup)/d */
3497   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3498   wide_int mlow = wi::udiv_trunc (val, d);
3499
3500   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3501   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3502   wide_int mhigh = wi::udiv_trunc (val, d);
3503
3504   /* If precision == N, then mlow, mhigh exceed 2^N
3505      (but they do not exceed 2^(N+1)).  */
3506
3507   /* Reduce to lowest terms.  */
3508   for (post_shift = lgup; post_shift > 0; post_shift--)
3509     {
3510       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3511                                                        HOST_BITS_PER_WIDE_INT);
3512       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3513                                                        HOST_BITS_PER_WIDE_INT);
3514       if (ml_lo >= mh_lo)
3515         break;
3516
3517       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3518       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3519     }
3520
3521   *post_shift_ptr = post_shift;
3522   *lgup_ptr = lgup;
3523   if (n < HOST_BITS_PER_WIDE_INT)
3524     {
3525       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3526       *multiplier_ptr = mhigh.to_uhwi () & mask;
3527       return mhigh.to_uhwi () >= mask;
3528     }
3529   else
3530     {
3531       *multiplier_ptr = mhigh.to_uhwi ();
3532       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3533     }
3534 }
3535
3536 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3537    congruent to 1 (mod 2**N).  */
3538
3539 static unsigned HOST_WIDE_INT
3540 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3541 {
3542   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3543
3544   /* The algorithm notes that the choice y = x satisfies
3545      x*y == 1 mod 2^3, since x is assumed odd.
3546      Each iteration doubles the number of bits of significance in y.  */
3547
3548   unsigned HOST_WIDE_INT mask;
3549   unsigned HOST_WIDE_INT y = x;
3550   int nbit = 3;
3551
3552   mask = (n == HOST_BITS_PER_WIDE_INT
3553           ? ~(unsigned HOST_WIDE_INT) 0
3554           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3555
3556   while (nbit < n)
3557     {
3558       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3559       nbit *= 2;
3560     }
3561   return y;
3562 }
3563
3564 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3565    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3566    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3567    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3568    become signed.
3569
3570    The result is put in TARGET if that is convenient.
3571
3572    MODE is the mode of operation.  */
3573
3574 rtx
3575 expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
3576                              rtx op1, rtx target, int unsignedp)
3577 {
3578   rtx tem;
3579   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3580
3581   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3582                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3583   tem = expand_and (mode, tem, op1, NULL_RTX);
3584   adj_operand
3585     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3586                      adj_operand);
3587
3588   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3589                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3590   tem = expand_and (mode, tem, op0, NULL_RTX);
3591   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3592                           target);
3593
3594   return target;
3595 }
3596
3597 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3598
3599 static rtx
3600 extract_high_half (machine_mode mode, rtx op)
3601 {
3602   machine_mode wider_mode;
3603
3604   if (mode == word_mode)
3605     return gen_highpart (mode, op);
3606
3607   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3608
3609   wider_mode = GET_MODE_WIDER_MODE (mode);
3610   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3611                      GET_MODE_BITSIZE (mode), 0, 1);
3612   return convert_modes (mode, wider_mode, op, 0);
3613 }
3614
3615 /* Like expmed_mult_highpart, but only consider using a multiplication
3616    optab.  OP1 is an rtx for the constant operand.  */
3617
3618 static rtx
3619 expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
3620                             rtx target, int unsignedp, int max_cost)
3621 {
3622   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3623   machine_mode wider_mode;
3624   optab moptab;
3625   rtx tem;
3626   int size;
3627   bool speed = optimize_insn_for_speed_p ();
3628
3629   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3630
3631   wider_mode = GET_MODE_WIDER_MODE (mode);
3632   size = GET_MODE_BITSIZE (mode);
3633
3634   /* Firstly, try using a multiplication insn that only generates the needed
3635      high part of the product, and in the sign flavor of unsignedp.  */
3636   if (mul_highpart_cost (speed, mode) < max_cost)
3637     {
3638       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3639       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3640                           unsignedp, OPTAB_DIRECT);
3641       if (tem)
3642         return tem;
3643     }
3644
3645   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3646      Need to adjust the result after the multiplication.  */
3647   if (size - 1 < BITS_PER_WORD
3648       && (mul_highpart_cost (speed, mode)
3649           + 2 * shift_cost (speed, mode, size-1)
3650           + 4 * add_cost (speed, mode) < max_cost))
3651     {
3652       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3653       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3654                           unsignedp, OPTAB_DIRECT);
3655       if (tem)
3656         /* We used the wrong signedness.  Adjust the result.  */
3657         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3658                                             tem, unsignedp);
3659     }
3660
3661   /* Try widening multiplication.  */
3662   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3663   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3664       && mul_widen_cost (speed, wider_mode) < max_cost)
3665     {
3666       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3667                           unsignedp, OPTAB_WIDEN);
3668       if (tem)
3669         return extract_high_half (mode, tem);
3670     }
3671
3672   /* Try widening the mode and perform a non-widening multiplication.  */
3673   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3674       && size - 1 < BITS_PER_WORD
3675       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3676           < max_cost))
3677     {
3678       rtx_insn *insns;
3679       rtx wop0, wop1;
3680
3681       /* We need to widen the operands, for example to ensure the
3682          constant multiplier is correctly sign or zero extended.
3683          Use a sequence to clean-up any instructions emitted by
3684          the conversions if things don't work out.  */
3685       start_sequence ();
3686       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3687       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3688       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3689                           unsignedp, OPTAB_WIDEN);
3690       insns = get_insns ();
3691       end_sequence ();
3692
3693       if (tem)
3694         {
3695           emit_insn (insns);
3696           return extract_high_half (mode, tem);
3697         }
3698     }
3699
3700   /* Try widening multiplication of opposite signedness, and adjust.  */
3701   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3702   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3703       && size - 1 < BITS_PER_WORD
3704       && (mul_widen_cost (speed, wider_mode)
3705           + 2 * shift_cost (speed, mode, size-1)
3706           + 4 * add_cost (speed, mode) < max_cost))
3707     {
3708       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3709                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3710       if (tem != 0)
3711         {
3712           tem = extract_high_half (mode, tem);
3713           /* We used the wrong signedness.  Adjust the result.  */
3714           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3715                                               target, unsignedp);
3716         }
3717     }
3718
3719   return 0;
3720 }
3721
3722 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3723    putting the high half of the result in TARGET if that is convenient,
3724    and return where the result is.  If the operation can not be performed,
3725    0 is returned.
3726
3727    MODE is the mode of operation and result.
3728
3729    UNSIGNEDP nonzero means unsigned multiply.
3730
3731    MAX_COST is the total allowed cost for the expanded RTL.  */
3732
3733 static rtx
3734 expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
3735                       rtx target, int unsignedp, int max_cost)
3736 {
3737   machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3738   unsigned HOST_WIDE_INT cnst1;
3739   int extra_cost;
3740   bool sign_adjust = false;
3741   enum mult_variant variant;
3742   struct algorithm alg;
3743   rtx tem;
3744   bool speed = optimize_insn_for_speed_p ();
3745
3746   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3747   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3748   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3749
3750   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3751
3752   /* We can't optimize modes wider than BITS_PER_WORD.
3753      ??? We might be able to perform double-word arithmetic if
3754      mode == word_mode, however all the cost calculations in
3755      synth_mult etc. assume single-word operations.  */
3756   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3757     return expmed_mult_highpart_optab (mode, op0, op1, target,
3758                                        unsignedp, max_cost);
3759
3760   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3761
3762   /* Check whether we try to multiply by a negative constant.  */
3763   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3764     {
3765       sign_adjust = true;
3766       extra_cost += add_cost (speed, mode);
3767     }
3768
3769   /* See whether shift/add multiplication is cheap enough.  */
3770   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3771                            max_cost - extra_cost))
3772     {
3773       /* See whether the specialized multiplication optabs are
3774          cheaper than the shift/add version.  */
3775       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3776                                         alg.cost.cost + extra_cost);
3777       if (tem)
3778         return tem;
3779
3780       tem = convert_to_mode (wider_mode, op0, unsignedp);
3781       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3782       tem = extract_high_half (mode, tem);
3783
3784       /* Adjust result for signedness.  */
3785       if (sign_adjust)
3786         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3787
3788       return tem;
3789     }
3790   return expmed_mult_highpart_optab (mode, op0, op1, target,
3791                                      unsignedp, max_cost);
3792 }
3793
3794
3795 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3796
3797 static rtx
3798 expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3799 {
3800   rtx result, temp, shift;
3801   rtx_code_label *label;
3802   int logd;
3803   int prec = GET_MODE_PRECISION (mode);
3804
3805   logd = floor_log2 (d);
3806   result = gen_reg_rtx (mode);
3807
3808   /* Avoid conditional branches when they're expensive.  */
3809   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3810       && optimize_insn_for_speed_p ())
3811     {
3812       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3813                                       mode, 0, -1);
3814       if (signmask)
3815         {
3816           HOST_WIDE_INT masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3817           signmask = force_reg (mode, signmask);
3818           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3819
3820           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3821              which instruction sequence to use.  If logical right shifts
3822              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3823              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3824
3825           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3826           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3827               || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
3828                   > COSTS_N_INSNS (2)))
3829             {
3830               temp = expand_binop (mode, xor_optab, op0, signmask,
3831                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3832               temp = expand_binop (mode, sub_optab, temp, signmask,
3833                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3834               temp = expand_binop (mode, and_optab, temp,
3835                                    gen_int_mode (masklow, mode),
3836                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3837               temp = expand_binop (mode, xor_optab, temp, signmask,
3838                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3839               temp = expand_binop (mode, sub_optab, temp, signmask,
3840                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3841             }
3842           else
3843             {
3844               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3845                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3846               signmask = force_reg (mode, signmask);
3847
3848               temp = expand_binop (mode, add_optab, op0, signmask,
3849                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3850               temp = expand_binop (mode, and_optab, temp,
3851                                    gen_int_mode (masklow, mode),
3852                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3853               temp = expand_binop (mode, sub_optab, temp, signmask,
3854                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3855             }
3856           return temp;
3857         }
3858     }
3859
3860   /* Mask contains the mode's signbit and the significant bits of the
3861      modulus.  By including the signbit in the operation, many targets
3862      can avoid an explicit compare operation in the following comparison
3863      against zero.  */
3864   wide_int mask = wi::mask (logd, false, prec);
3865   mask = wi::set_bit (mask, prec - 1);
3866
3867   temp = expand_binop (mode, and_optab, op0,
3868                        immed_wide_int_const (mask, mode),
3869                        result, 1, OPTAB_LIB_WIDEN);
3870   if (temp != result)
3871     emit_move_insn (result, temp);
3872
3873   label = gen_label_rtx ();
3874   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3875
3876   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3877                        0, OPTAB_LIB_WIDEN);
3878
3879   mask = wi::mask (logd, true, prec);
3880   temp = expand_binop (mode, ior_optab, temp,
3881                        immed_wide_int_const (mask, mode),
3882                        result, 1, OPTAB_LIB_WIDEN);
3883   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3884                        0, OPTAB_LIB_WIDEN);
3885   if (temp != result)
3886     emit_move_insn (result, temp);
3887   emit_label (label);
3888   return result;
3889 }
3890
3891 /* Expand signed division of OP0 by a power of two D in mode MODE.
3892    This routine is only called for positive values of D.  */
3893
3894 static rtx
3895 expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3896 {
3897   rtx temp;
3898   rtx_code_label *label;
3899   int logd;
3900
3901   logd = floor_log2 (d);
3902
3903   if (d == 2
3904       && BRANCH_COST (optimize_insn_for_speed_p (),
3905                       false) >= 1)
3906     {
3907       temp = gen_reg_rtx (mode);
3908       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3909       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3910                            0, OPTAB_LIB_WIDEN);
3911       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3912     }
3913
3914   if (HAVE_conditional_move
3915       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
3916     {
3917       rtx temp2;
3918
3919       start_sequence ();
3920       temp2 = copy_to_mode_reg (mode, op0);
3921       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3922                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3923       temp = force_reg (mode, temp);
3924
3925       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3926       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3927                                      mode, temp, temp2, mode, 0);
3928       if (temp2)
3929         {
3930           rtx_insn *seq = get_insns ();
3931           end_sequence ();
3932           emit_insn (seq);
3933           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3934         }
3935       end_sequence ();
3936     }
3937
3938   if (BRANCH_COST (optimize_insn_for_speed_p (),
3939                    false) >= 2)
3940     {
3941       int ushift = GET_MODE_BITSIZE (mode) - logd;
3942
3943       temp = gen_reg_rtx (mode);
3944       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3945       if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3946           || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3947              > COSTS_N_INSNS (1))
3948         temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3949                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3950       else
3951         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3952                              ushift, NULL_RTX, 1);
3953       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3954                            0, OPTAB_LIB_WIDEN);
3955       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3956     }
3957
3958   label = gen_label_rtx ();
3959   temp = copy_to_mode_reg (mode, op0);
3960   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3961   expand_inc (temp, gen_int_mode (d - 1, mode));
3962   emit_label (label);
3963   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3964 }
3965 \f
3966 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3967    if that is convenient, and returning where the result is.
3968    You may request either the quotient or the remainder as the result;
3969    specify REM_FLAG nonzero to get the remainder.
3970
3971    CODE is the expression code for which kind of division this is;
3972    it controls how rounding is done.  MODE is the machine mode to use.
3973    UNSIGNEDP nonzero means do unsigned division.  */
3974
3975 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3976    and then correct it by or'ing in missing high bits
3977    if result of ANDI is nonzero.
3978    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3979    This could optimize to a bfexts instruction.
3980    But C doesn't use these operations, so their optimizations are
3981    left for later.  */
3982 /* ??? For modulo, we don't actually need the highpart of the first product,
3983    the low part will do nicely.  And for small divisors, the second multiply
3984    can also be a low-part only multiply or even be completely left out.
3985    E.g. to calculate the remainder of a division by 3 with a 32 bit
3986    multiply, multiply with 0x55555556 and extract the upper two bits;
3987    the result is exact for inputs up to 0x1fffffff.
3988    The input range can be reduced by using cross-sum rules.
3989    For odd divisors >= 3, the following table gives right shift counts
3990    so that if a number is shifted by an integer multiple of the given
3991    amount, the remainder stays the same:
3992    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3993    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3994    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3995    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3996    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3997
3998    Cross-sum rules for even numbers can be derived by leaving as many bits
3999    to the right alone as the divisor has zeros to the right.
4000    E.g. if x is an unsigned 32 bit number:
4001    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4002    */
4003
4004 rtx
4005 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4006                rtx op0, rtx op1, rtx target, int unsignedp)
4007 {
4008   machine_mode compute_mode;
4009   rtx tquotient;
4010   rtx quotient = 0, remainder = 0;
4011   rtx_insn *last;
4012   int size;
4013   rtx_insn *insn;
4014   optab optab1, optab2;
4015   int op1_is_constant, op1_is_pow2 = 0;
4016   int max_cost, extra_cost;
4017   static HOST_WIDE_INT last_div_const = 0;
4018   bool speed = optimize_insn_for_speed_p ();
4019
4020   op1_is_constant = CONST_INT_P (op1);
4021   if (op1_is_constant)
4022     {
4023       unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1);
4024       if (unsignedp)
4025         ext_op1 &= GET_MODE_MASK (mode);
4026       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
4027                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
4028     }
4029
4030   /*
4031      This is the structure of expand_divmod:
4032
4033      First comes code to fix up the operands so we can perform the operations
4034      correctly and efficiently.
4035
4036      Second comes a switch statement with code specific for each rounding mode.
4037      For some special operands this code emits all RTL for the desired
4038      operation, for other cases, it generates only a quotient and stores it in
4039      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4040      to indicate that it has not done anything.
4041
4042      Last comes code that finishes the operation.  If QUOTIENT is set and
4043      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4044      QUOTIENT is not set, it is computed using trunc rounding.
4045
4046      We try to generate special code for division and remainder when OP1 is a
4047      constant.  If |OP1| = 2**n we can use shifts and some other fast
4048      operations.  For other values of OP1, we compute a carefully selected
4049      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4050      by m.
4051
4052      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4053      half of the product.  Different strategies for generating the product are
4054      implemented in expmed_mult_highpart.
4055
4056      If what we actually want is the remainder, we generate that by another
4057      by-constant multiplication and a subtraction.  */
4058
4059   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4060      code below will malfunction if we are, so check here and handle
4061      the special case if so.  */
4062   if (op1 == const1_rtx)
4063     return rem_flag ? const0_rtx : op0;
4064
4065     /* When dividing by -1, we could get an overflow.
4066      negv_optab can handle overflows.  */
4067   if (! unsignedp && op1 == constm1_rtx)
4068     {
4069       if (rem_flag)
4070         return const0_rtx;
4071       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4072                           ? negv_optab : neg_optab, op0, target, 0);
4073     }
4074
4075   if (target
4076       /* Don't use the function value register as a target
4077          since we have to read it as well as write it,
4078          and function-inlining gets confused by this.  */
4079       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4080           /* Don't clobber an operand while doing a multi-step calculation.  */
4081           || ((rem_flag || op1_is_constant)
4082               && (reg_mentioned_p (target, op0)
4083                   || (MEM_P (op0) && MEM_P (target))))
4084           || reg_mentioned_p (target, op1)
4085           || (MEM_P (op1) && MEM_P (target))))
4086     target = 0;
4087
4088   /* Get the mode in which to perform this computation.  Normally it will
4089      be MODE, but sometimes we can't do the desired operation in MODE.
4090      If so, pick a wider mode in which we can do the operation.  Convert
4091      to that mode at the start to avoid repeated conversions.
4092
4093      First see what operations we need.  These depend on the expression
4094      we are evaluating.  (We assume that divxx3 insns exist under the
4095      same conditions that modxx3 insns and that these insns don't normally
4096      fail.  If these assumptions are not correct, we may generate less
4097      efficient code in some cases.)
4098
4099      Then see if we find a mode in which we can open-code that operation
4100      (either a division, modulus, or shift).  Finally, check for the smallest
4101      mode for which we can do the operation with a library call.  */
4102
4103   /* We might want to refine this now that we have division-by-constant
4104      optimization.  Since expmed_mult_highpart tries so many variants, it is
4105      not straightforward to generalize this.  Maybe we should make an array
4106      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4107
4108   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
4109             ? (unsignedp ? lshr_optab : ashr_optab)
4110             : (unsignedp ? udiv_optab : sdiv_optab));
4111   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
4112             ? optab1
4113             : (unsignedp ? udivmod_optab : sdivmod_optab));
4114
4115   for (compute_mode = mode; compute_mode != VOIDmode;
4116        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4117     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4118         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4119       break;
4120
4121   if (compute_mode == VOIDmode)
4122     for (compute_mode = mode; compute_mode != VOIDmode;
4123          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4124       if (optab_libfunc (optab1, compute_mode)
4125           || optab_libfunc (optab2, compute_mode))
4126         break;
4127
4128   /* If we still couldn't find a mode, use MODE, but expand_binop will
4129      probably die.  */
4130   if (compute_mode == VOIDmode)
4131     compute_mode = mode;
4132
4133   if (target && GET_MODE (target) == compute_mode)
4134     tquotient = target;
4135   else
4136     tquotient = gen_reg_rtx (compute_mode);
4137
4138   size = GET_MODE_BITSIZE (compute_mode);
4139 #if 0
4140   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4141      (mode), and thereby get better code when OP1 is a constant.  Do that
4142      later.  It will require going over all usages of SIZE below.  */
4143   size = GET_MODE_BITSIZE (mode);
4144 #endif
4145
4146   /* Only deduct something for a REM if the last divide done was
4147      for a different constant.   Then set the constant of the last
4148      divide.  */
4149   max_cost = (unsignedp
4150               ? udiv_cost (speed, compute_mode)
4151               : sdiv_cost (speed, compute_mode));
4152   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4153                      && INTVAL (op1) == last_div_const))
4154     max_cost -= (mul_cost (speed, compute_mode)
4155                  + add_cost (speed, compute_mode));
4156
4157   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4158
4159   /* Now convert to the best mode to use.  */
4160   if (compute_mode != mode)
4161     {
4162       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4163       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4164
4165       /* convert_modes may have placed op1 into a register, so we
4166          must recompute the following.  */
4167       op1_is_constant = CONST_INT_P (op1);
4168       op1_is_pow2 = (op1_is_constant
4169                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4170                           || (! unsignedp
4171                               && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1))))));
4172     }
4173
4174   /* If one of the operands is a volatile MEM, copy it into a register.  */
4175
4176   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4177     op0 = force_reg (compute_mode, op0);
4178   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4179     op1 = force_reg (compute_mode, op1);
4180
4181   /* If we need the remainder or if OP1 is constant, we need to
4182      put OP0 in a register in case it has any queued subexpressions.  */
4183   if (rem_flag || op1_is_constant)
4184     op0 = force_reg (compute_mode, op0);
4185
4186   last = get_last_insn ();
4187
4188   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4189   if (unsignedp)
4190     {
4191       if (code == FLOOR_DIV_EXPR)
4192         code = TRUNC_DIV_EXPR;
4193       if (code == FLOOR_MOD_EXPR)
4194         code = TRUNC_MOD_EXPR;
4195       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4196         code = TRUNC_DIV_EXPR;
4197     }
4198
4199   if (op1 != const0_rtx)
4200     switch (code)
4201       {
4202       case TRUNC_MOD_EXPR:
4203       case TRUNC_DIV_EXPR:
4204         if (op1_is_constant)
4205           {
4206             if (unsignedp)
4207               {
4208                 unsigned HOST_WIDE_INT mh, ml;
4209                 int pre_shift, post_shift;
4210                 int dummy;
4211                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4212                                             & GET_MODE_MASK (compute_mode));
4213
4214                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4215                   {
4216                     pre_shift = floor_log2 (d);
4217                     if (rem_flag)
4218                       {
4219                         unsigned HOST_WIDE_INT mask
4220                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4221                         remainder
4222                           = expand_binop (compute_mode, and_optab, op0,
4223                                           gen_int_mode (mask, compute_mode),
4224                                           remainder, 1,
4225                                           OPTAB_LIB_WIDEN);
4226                         if (remainder)
4227                           return gen_lowpart (mode, remainder);
4228                       }
4229                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4230                                              pre_shift, tquotient, 1);
4231                   }
4232                 else if (size <= HOST_BITS_PER_WIDE_INT)
4233                   {
4234                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4235                       {
4236                         /* Most significant bit of divisor is set; emit an scc
4237                            insn.  */
4238                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4239                                                           compute_mode, 1, 1);
4240                       }
4241                     else
4242                       {
4243                         /* Find a suitable multiplier and right shift count
4244                            instead of multiplying with D.  */
4245
4246                         mh = choose_multiplier (d, size, size,
4247                                                 &ml, &post_shift, &dummy);
4248
4249                         /* If the suggested multiplier is more than SIZE bits,
4250                            we can do better for even divisors, using an
4251                            initial right shift.  */
4252                         if (mh != 0 && (d & 1) == 0)
4253                           {
4254                             pre_shift = floor_log2 (d & -d);
4255                             mh = choose_multiplier (d >> pre_shift, size,
4256                                                     size - pre_shift,
4257                                                     &ml, &post_shift, &dummy);
4258                             gcc_assert (!mh);
4259                           }
4260                         else
4261                           pre_shift = 0;
4262
4263                         if (mh != 0)
4264                           {
4265                             rtx t1, t2, t3, t4;
4266
4267                             if (post_shift - 1 >= BITS_PER_WORD)
4268                               goto fail1;
4269
4270                             extra_cost
4271                               = (shift_cost (speed, compute_mode, post_shift - 1)
4272                                  + shift_cost (speed, compute_mode, 1)
4273                                  + 2 * add_cost (speed, compute_mode));
4274                             t1 = expmed_mult_highpart
4275                               (compute_mode, op0,
4276                                gen_int_mode (ml, compute_mode),
4277                                NULL_RTX, 1, max_cost - extra_cost);
4278                             if (t1 == 0)
4279                               goto fail1;
4280                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4281                                                                op0, t1),
4282                                                 NULL_RTX);
4283                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4284                                                t2, 1, NULL_RTX, 1);
4285                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4286                                                               t1, t3),
4287                                                 NULL_RTX);
4288                             quotient = expand_shift
4289                               (RSHIFT_EXPR, compute_mode, t4,
4290                                post_shift - 1, tquotient, 1);
4291                           }
4292                         else
4293                           {
4294                             rtx t1, t2;
4295
4296                             if (pre_shift >= BITS_PER_WORD
4297                                 || post_shift >= BITS_PER_WORD)
4298                               goto fail1;
4299
4300                             t1 = expand_shift
4301                               (RSHIFT_EXPR, compute_mode, op0,
4302                                pre_shift, NULL_RTX, 1);
4303                             extra_cost
4304                               = (shift_cost (speed, compute_mode, pre_shift)
4305                                  + shift_cost (speed, compute_mode, post_shift));
4306                             t2 = expmed_mult_highpart
4307                               (compute_mode, t1,
4308                                gen_int_mode (ml, compute_mode),
4309                                NULL_RTX, 1, max_cost - extra_cost);
4310                             if (t2 == 0)
4311                               goto fail1;
4312                             quotient = expand_shift
4313                               (RSHIFT_EXPR, compute_mode, t2,
4314                                post_shift, tquotient, 1);
4315                           }
4316                       }
4317                   }
4318                 else            /* Too wide mode to use tricky code */
4319                   break;
4320
4321                 insn = get_last_insn ();
4322                 if (insn != last)
4323                   set_dst_reg_note (insn, REG_EQUAL,
4324                                     gen_rtx_UDIV (compute_mode, op0, op1),
4325                                     quotient);
4326               }
4327             else                /* TRUNC_DIV, signed */
4328               {
4329                 unsigned HOST_WIDE_INT ml;
4330                 int lgup, post_shift;
4331                 rtx mlr;
4332                 HOST_WIDE_INT d = INTVAL (op1);
4333                 unsigned HOST_WIDE_INT abs_d;
4334
4335                 /* Since d might be INT_MIN, we have to cast to
4336                    unsigned HOST_WIDE_INT before negating to avoid
4337                    undefined signed overflow.  */
4338                 abs_d = (d >= 0
4339                          ? (unsigned HOST_WIDE_INT) d
4340                          : - (unsigned HOST_WIDE_INT) d);
4341
4342                 /* n rem d = n rem -d */
4343                 if (rem_flag && d < 0)
4344                   {
4345                     d = abs_d;
4346                     op1 = gen_int_mode (abs_d, compute_mode);
4347                   }
4348
4349                 if (d == 1)
4350                   quotient = op0;
4351                 else if (d == -1)
4352                   quotient = expand_unop (compute_mode, neg_optab, op0,
4353                                           tquotient, 0);
4354                 else if (HOST_BITS_PER_WIDE_INT >= size
4355                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4356                   {
4357                     /* This case is not handled correctly below.  */
4358                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4359                                                 compute_mode, 1, 1);
4360                     if (quotient == 0)
4361                       goto fail1;
4362                   }
4363                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4364                          && (rem_flag
4365                              ? smod_pow2_cheap (speed, compute_mode)
4366                              : sdiv_pow2_cheap (speed, compute_mode))
4367                          /* We assume that cheap metric is true if the
4368                             optab has an expander for this mode.  */
4369                          && ((optab_handler ((rem_flag ? smod_optab
4370                                               : sdiv_optab),
4371                                              compute_mode)
4372                               != CODE_FOR_nothing)
4373                              || (optab_handler (sdivmod_optab,
4374                                                 compute_mode)
4375                                  != CODE_FOR_nothing)))
4376                   ;
4377                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4378                   {
4379                     if (rem_flag)
4380                       {
4381                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4382                         if (remainder)
4383                           return gen_lowpart (mode, remainder);
4384                       }
4385
4386                     if (sdiv_pow2_cheap (speed, compute_mode)
4387                         && ((optab_handler (sdiv_optab, compute_mode)
4388                              != CODE_FOR_nothing)
4389                             || (optab_handler (sdivmod_optab, compute_mode)
4390                                 != CODE_FOR_nothing)))
4391                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4392                                                 compute_mode, op0,
4393                                                 gen_int_mode (abs_d,
4394                                                               compute_mode),
4395                                                 NULL_RTX, 0);
4396                     else
4397                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4398
4399                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4400                        negate the quotient.  */
4401                     if (d < 0)
4402                       {
4403                         insn = get_last_insn ();
4404                         if (insn != last
4405                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4406                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4407                           set_dst_reg_note (insn, REG_EQUAL,
4408                                             gen_rtx_DIV (compute_mode, op0,
4409                                                          gen_int_mode
4410                                                            (abs_d,
4411                                                             compute_mode)),
4412                                             quotient);
4413
4414                         quotient = expand_unop (compute_mode, neg_optab,
4415                                                 quotient, quotient, 0);
4416                       }
4417                   }
4418                 else if (size <= HOST_BITS_PER_WIDE_INT)
4419                   {
4420                     choose_multiplier (abs_d, size, size - 1,
4421                                        &ml, &post_shift, &lgup);
4422                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4423                       {
4424                         rtx t1, t2, t3;
4425
4426                         if (post_shift >= BITS_PER_WORD
4427                             || size - 1 >= BITS_PER_WORD)
4428                           goto fail1;
4429
4430                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4431                                       + shift_cost (speed, compute_mode, size - 1)
4432                                       + add_cost (speed, compute_mode));
4433                         t1 = expmed_mult_highpart
4434                           (compute_mode, op0, gen_int_mode (ml, compute_mode),
4435                            NULL_RTX, 0, max_cost - extra_cost);
4436                         if (t1 == 0)
4437                           goto fail1;
4438                         t2 = expand_shift
4439                           (RSHIFT_EXPR, compute_mode, t1,
4440                            post_shift, NULL_RTX, 0);
4441                         t3 = expand_shift
4442                           (RSHIFT_EXPR, compute_mode, op0,
4443                            size - 1, NULL_RTX, 0);
4444                         if (d < 0)
4445                           quotient
4446                             = force_operand (gen_rtx_MINUS (compute_mode,
4447                                                             t3, t2),
4448                                              tquotient);
4449                         else
4450                           quotient
4451                             = force_operand (gen_rtx_MINUS (compute_mode,
4452                                                             t2, t3),
4453                                              tquotient);
4454                       }
4455                     else
4456                       {
4457                         rtx t1, t2, t3, t4;
4458
4459                         if (post_shift >= BITS_PER_WORD
4460                             || size - 1 >= BITS_PER_WORD)
4461                           goto fail1;
4462
4463                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4464                         mlr = gen_int_mode (ml, compute_mode);
4465                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4466                                       + shift_cost (speed, compute_mode, size - 1)
4467                                       + 2 * add_cost (speed, compute_mode));
4468                         t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4469                                                    NULL_RTX, 0,
4470                                                    max_cost - extra_cost);
4471                         if (t1 == 0)
4472                           goto fail1;
4473                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4474                                                           t1, op0),
4475                                             NULL_RTX);
4476                         t3 = expand_shift
4477                           (RSHIFT_EXPR, compute_mode, t2,
4478                            post_shift, NULL_RTX, 0);
4479                         t4 = expand_shift
4480                           (RSHIFT_EXPR, compute_mode, op0,
4481                            size - 1, NULL_RTX, 0);
4482                         if (d < 0)
4483                           quotient
4484                             = force_operand (gen_rtx_MINUS (compute_mode,
4485                                                             t4, t3),
4486                                              tquotient);
4487                         else
4488                           quotient
4489                             = force_operand (gen_rtx_MINUS (compute_mode,
4490                                                             t3, t4),
4491                                              tquotient);
4492                       }
4493                   }
4494                 else            /* Too wide mode to use tricky code */
4495                   break;
4496
4497                 insn = get_last_insn ();
4498                 if (insn != last)
4499                   set_dst_reg_note (insn, REG_EQUAL,
4500                                     gen_rtx_DIV (compute_mode, op0, op1),
4501                                     quotient);
4502               }
4503             break;
4504           }
4505       fail1:
4506         delete_insns_since (last);
4507         break;
4508
4509       case FLOOR_DIV_EXPR:
4510       case FLOOR_MOD_EXPR:
4511       /* We will come here only for signed operations.  */
4512         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4513           {
4514             unsigned HOST_WIDE_INT mh, ml;
4515             int pre_shift, lgup, post_shift;
4516             HOST_WIDE_INT d = INTVAL (op1);
4517
4518             if (d > 0)
4519               {
4520                 /* We could just as easily deal with negative constants here,
4521                    but it does not seem worth the trouble for GCC 2.6.  */
4522                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4523                   {
4524                     pre_shift = floor_log2 (d);
4525                     if (rem_flag)
4526                       {
4527                         unsigned HOST_WIDE_INT mask
4528                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4529                         remainder = expand_binop
4530                           (compute_mode, and_optab, op0,
4531                            gen_int_mode (mask, compute_mode),
4532                            remainder, 0, OPTAB_LIB_WIDEN);
4533                         if (remainder)
4534                           return gen_lowpart (mode, remainder);
4535                       }
4536                     quotient = expand_shift
4537                       (RSHIFT_EXPR, compute_mode, op0,
4538                        pre_shift, tquotient, 0);
4539                   }
4540                 else
4541                   {
4542                     rtx t1, t2, t3, t4;
4543
4544                     mh = choose_multiplier (d, size, size - 1,
4545                                             &ml, &post_shift, &lgup);
4546                     gcc_assert (!mh);
4547
4548                     if (post_shift < BITS_PER_WORD
4549                         && size - 1 < BITS_PER_WORD)
4550                       {
4551                         t1 = expand_shift
4552                           (RSHIFT_EXPR, compute_mode, op0,
4553                            size - 1, NULL_RTX, 0);
4554                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4555                                            NULL_RTX, 0, OPTAB_WIDEN);
4556                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4557                                       + shift_cost (speed, compute_mode, size - 1)
4558                                       + 2 * add_cost (speed, compute_mode));
4559                         t3 = expmed_mult_highpart
4560                           (compute_mode, t2, gen_int_mode (ml, compute_mode),
4561                            NULL_RTX, 1, max_cost - extra_cost);
4562                         if (t3 != 0)
4563                           {
4564                             t4 = expand_shift
4565                               (RSHIFT_EXPR, compute_mode, t3,
4566                                post_shift, NULL_RTX, 1);
4567                             quotient = expand_binop (compute_mode, xor_optab,
4568                                                      t4, t1, tquotient, 0,
4569                                                      OPTAB_WIDEN);
4570                           }
4571                       }
4572                   }
4573               }
4574             else
4575               {
4576                 rtx nsign, t1, t2, t3, t4;
4577                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4578                                                   op0, constm1_rtx), NULL_RTX);
4579                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4580                                    0, OPTAB_WIDEN);
4581                 nsign = expand_shift
4582                   (RSHIFT_EXPR, compute_mode, t2,
4583                    size - 1, NULL_RTX, 0);
4584                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4585                                     NULL_RTX);
4586                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4587                                     NULL_RTX, 0);
4588                 if (t4)
4589                   {
4590                     rtx t5;
4591                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4592                                       NULL_RTX, 0);
4593                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4594                                                             t4, t5),
4595                                               tquotient);
4596                   }
4597               }
4598           }
4599
4600         if (quotient != 0)
4601           break;
4602         delete_insns_since (last);
4603
4604         /* Try using an instruction that produces both the quotient and
4605            remainder, using truncation.  We can easily compensate the quotient
4606            or remainder to get floor rounding, once we have the remainder.
4607            Notice that we compute also the final remainder value here,
4608            and return the result right away.  */
4609         if (target == 0 || GET_MODE (target) != compute_mode)
4610           target = gen_reg_rtx (compute_mode);
4611
4612         if (rem_flag)
4613           {
4614             remainder
4615               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4616             quotient = gen_reg_rtx (compute_mode);
4617           }
4618         else
4619           {
4620             quotient
4621               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4622             remainder = gen_reg_rtx (compute_mode);
4623           }
4624
4625         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4626                                  quotient, remainder, 0))
4627           {
4628             /* This could be computed with a branch-less sequence.
4629                Save that for later.  */
4630             rtx tem;
4631             rtx_code_label *label = gen_label_rtx ();
4632             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4633             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4634                                 NULL_RTX, 0, OPTAB_WIDEN);
4635             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4636             expand_dec (quotient, const1_rtx);
4637             expand_inc (remainder, op1);
4638             emit_label (label);
4639             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4640           }
4641
4642         /* No luck with division elimination or divmod.  Have to do it
4643            by conditionally adjusting op0 *and* the result.  */
4644         {
4645           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4646           rtx adjusted_op0;
4647           rtx tem;
4648
4649           quotient = gen_reg_rtx (compute_mode);
4650           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4651           label1 = gen_label_rtx ();
4652           label2 = gen_label_rtx ();
4653           label3 = gen_label_rtx ();
4654           label4 = gen_label_rtx ();
4655           label5 = gen_label_rtx ();
4656           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4657           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4658           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4659                               quotient, 0, OPTAB_LIB_WIDEN);
4660           if (tem != quotient)
4661             emit_move_insn (quotient, tem);
4662           emit_jump_insn (targetm.gen_jump (label5));
4663           emit_barrier ();
4664           emit_label (label1);
4665           expand_inc (adjusted_op0, const1_rtx);
4666           emit_jump_insn (targetm.gen_jump (label4));
4667           emit_barrier ();
4668           emit_label (label2);
4669           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4670           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4671                               quotient, 0, OPTAB_LIB_WIDEN);
4672           if (tem != quotient)
4673             emit_move_insn (quotient, tem);
4674           emit_jump_insn (targetm.gen_jump (label5));
4675           emit_barrier ();
4676           emit_label (label3);
4677           expand_dec (adjusted_op0, const1_rtx);
4678           emit_label (label4);
4679           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4680                               quotient, 0, OPTAB_LIB_WIDEN);
4681           if (tem != quotient)
4682             emit_move_insn (quotient, tem);
4683           expand_dec (quotient, const1_rtx);
4684           emit_label (label5);
4685         }
4686         break;
4687
4688       case CEIL_DIV_EXPR:
4689       case CEIL_MOD_EXPR:
4690         if (unsignedp)
4691           {
4692             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4693               {
4694                 rtx t1, t2, t3;
4695                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4696                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4697                                    floor_log2 (d), tquotient, 1);
4698                 t2 = expand_binop (compute_mode, and_optab, op0,
4699                                    gen_int_mode (d - 1, compute_mode),
4700                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4701                 t3 = gen_reg_rtx (compute_mode);
4702                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4703                                       compute_mode, 1, 1);
4704                 if (t3 == 0)
4705                   {
4706                     rtx_code_label *lab;
4707                     lab = gen_label_rtx ();
4708                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4709                     expand_inc (t1, const1_rtx);
4710                     emit_label (lab);
4711                     quotient = t1;
4712                   }
4713                 else
4714                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4715                                                           t1, t3),
4716                                             tquotient);
4717                 break;
4718               }
4719
4720             /* Try using an instruction that produces both the quotient and
4721                remainder, using truncation.  We can easily compensate the
4722                quotient or remainder to get ceiling rounding, once we have the
4723                remainder.  Notice that we compute also the final remainder
4724                value here, and return the result right away.  */
4725             if (target == 0 || GET_MODE (target) != compute_mode)
4726               target = gen_reg_rtx (compute_mode);
4727
4728             if (rem_flag)
4729               {
4730                 remainder = (REG_P (target)
4731                              ? target : gen_reg_rtx (compute_mode));
4732                 quotient = gen_reg_rtx (compute_mode);
4733               }
4734             else
4735               {
4736                 quotient = (REG_P (target)
4737                             ? target : gen_reg_rtx (compute_mode));
4738                 remainder = gen_reg_rtx (compute_mode);
4739               }
4740
4741             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4742                                      remainder, 1))
4743               {
4744                 /* This could be computed with a branch-less sequence.
4745                    Save that for later.  */
4746                 rtx_code_label *label = gen_label_rtx ();
4747                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4748                                  compute_mode, label);
4749                 expand_inc (quotient, const1_rtx);
4750                 expand_dec (remainder, op1);
4751                 emit_label (label);
4752                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4753               }
4754
4755             /* No luck with division elimination or divmod.  Have to do it
4756                by conditionally adjusting op0 *and* the result.  */
4757             {
4758               rtx_code_label *label1, *label2;
4759               rtx adjusted_op0, tem;
4760
4761               quotient = gen_reg_rtx (compute_mode);
4762               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4763               label1 = gen_label_rtx ();
4764               label2 = gen_label_rtx ();
4765               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4766                                compute_mode, label1);
4767               emit_move_insn  (quotient, const0_rtx);
4768               emit_jump_insn (targetm.gen_jump (label2));
4769               emit_barrier ();
4770               emit_label (label1);
4771               expand_dec (adjusted_op0, const1_rtx);
4772               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4773                                   quotient, 1, OPTAB_LIB_WIDEN);
4774               if (tem != quotient)
4775                 emit_move_insn (quotient, tem);
4776               expand_inc (quotient, const1_rtx);
4777               emit_label (label2);
4778             }
4779           }
4780         else /* signed */
4781           {
4782             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4783                 && INTVAL (op1) >= 0)
4784               {
4785                 /* This is extremely similar to the code for the unsigned case
4786                    above.  For 2.7 we should merge these variants, but for
4787                    2.6.1 I don't want to touch the code for unsigned since that
4788                    get used in C.  The signed case will only be used by other
4789                    languages (Ada).  */
4790
4791                 rtx t1, t2, t3;
4792                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4793                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4794                                    floor_log2 (d), tquotient, 0);
4795                 t2 = expand_binop (compute_mode, and_optab, op0,
4796                                    gen_int_mode (d - 1, compute_mode),
4797                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4798                 t3 = gen_reg_rtx (compute_mode);
4799                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4800                                       compute_mode, 1, 1);
4801                 if (t3 == 0)
4802                   {
4803                     rtx_code_label *lab;
4804                     lab = gen_label_rtx ();
4805                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4806                     expand_inc (t1, const1_rtx);
4807                     emit_label (lab);
4808                     quotient = t1;
4809                   }
4810                 else
4811                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4812                                                           t1, t3),
4813                                             tquotient);
4814                 break;
4815               }
4816
4817             /* Try using an instruction that produces both the quotient and
4818                remainder, using truncation.  We can easily compensate the
4819                quotient or remainder to get ceiling rounding, once we have the
4820                remainder.  Notice that we compute also the final remainder
4821                value here, and return the result right away.  */
4822             if (target == 0 || GET_MODE (target) != compute_mode)
4823               target = gen_reg_rtx (compute_mode);
4824             if (rem_flag)
4825               {
4826                 remainder= (REG_P (target)
4827                             ? target : gen_reg_rtx (compute_mode));
4828                 quotient = gen_reg_rtx (compute_mode);
4829               }
4830             else
4831               {
4832                 quotient = (REG_P (target)
4833                             ? target : gen_reg_rtx (compute_mode));
4834                 remainder = gen_reg_rtx (compute_mode);
4835               }
4836
4837             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4838                                      remainder, 0))
4839               {
4840                 /* This could be computed with a branch-less sequence.
4841                    Save that for later.  */
4842                 rtx tem;
4843                 rtx_code_label *label = gen_label_rtx ();
4844                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4845                                  compute_mode, label);
4846                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4847                                     NULL_RTX, 0, OPTAB_WIDEN);
4848                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4849                 expand_inc (quotient, const1_rtx);
4850                 expand_dec (remainder, op1);
4851                 emit_label (label);
4852                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4853               }
4854
4855             /* No luck with division elimination or divmod.  Have to do it
4856                by conditionally adjusting op0 *and* the result.  */
4857             {
4858               rtx_code_label *label1, *label2, *label3, *label4, *label5;
4859               rtx adjusted_op0;
4860               rtx tem;
4861
4862               quotient = gen_reg_rtx (compute_mode);
4863               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4864               label1 = gen_label_rtx ();
4865               label2 = gen_label_rtx ();
4866               label3 = gen_label_rtx ();
4867               label4 = gen_label_rtx ();
4868               label5 = gen_label_rtx ();
4869               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4870               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4871                                compute_mode, label1);
4872               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4873                                   quotient, 0, OPTAB_LIB_WIDEN);
4874               if (tem != quotient)
4875                 emit_move_insn (quotient, tem);
4876               emit_jump_insn (targetm.gen_jump (label5));
4877               emit_barrier ();
4878               emit_label (label1);
4879               expand_dec (adjusted_op0, const1_rtx);
4880               emit_jump_insn (targetm.gen_jump (label4));
4881               emit_barrier ();
4882               emit_label (label2);
4883               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4884                                compute_mode, label3);
4885               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4886                                   quotient, 0, OPTAB_LIB_WIDEN);
4887               if (tem != quotient)
4888                 emit_move_insn (quotient, tem);
4889               emit_jump_insn (targetm.gen_jump (label5));
4890               emit_barrier ();
4891               emit_label (label3);
4892               expand_inc (adjusted_op0, const1_rtx);
4893               emit_label (label4);
4894               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4895                                   quotient, 0, OPTAB_LIB_WIDEN);
4896               if (tem != quotient)
4897                 emit_move_insn (quotient, tem);
4898               expand_inc (quotient, const1_rtx);
4899               emit_label (label5);
4900             }
4901           }
4902         break;
4903
4904       case EXACT_DIV_EXPR:
4905         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4906           {
4907             HOST_WIDE_INT d = INTVAL (op1);
4908             unsigned HOST_WIDE_INT ml;
4909             int pre_shift;
4910             rtx t1;
4911
4912             pre_shift = floor_log2 (d & -d);
4913             ml = invert_mod2n (d >> pre_shift, size);
4914             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4915                                pre_shift, NULL_RTX, unsignedp);
4916             quotient = expand_mult (compute_mode, t1,
4917                                     gen_int_mode (ml, compute_mode),
4918                                     NULL_RTX, 1);
4919
4920             insn = get_last_insn ();
4921             set_dst_reg_note (insn, REG_EQUAL,
4922                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4923                                               compute_mode, op0, op1),
4924                               quotient);
4925           }
4926         break;
4927
4928       case ROUND_DIV_EXPR:
4929       case ROUND_MOD_EXPR:
4930         if (unsignedp)
4931           {
4932             rtx tem;
4933             rtx_code_label *label;
4934             label = gen_label_rtx ();
4935             quotient = gen_reg_rtx (compute_mode);
4936             remainder = gen_reg_rtx (compute_mode);
4937             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4938               {
4939                 rtx tem;
4940                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4941                                          quotient, 1, OPTAB_LIB_WIDEN);
4942                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4943                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4944                                           remainder, 1, OPTAB_LIB_WIDEN);
4945               }
4946             tem = plus_constant (compute_mode, op1, -1);
4947             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4948             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4949             expand_inc (quotient, const1_rtx);
4950             expand_dec (remainder, op1);
4951             emit_label (label);
4952           }
4953         else
4954           {
4955             rtx abs_rem, abs_op1, tem, mask;
4956             rtx_code_label *label;
4957             label = gen_label_rtx ();
4958             quotient = gen_reg_rtx (compute_mode);
4959             remainder = gen_reg_rtx (compute_mode);
4960             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4961               {
4962                 rtx tem;
4963                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4964                                          quotient, 0, OPTAB_LIB_WIDEN);
4965                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4966                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4967                                           remainder, 0, OPTAB_LIB_WIDEN);
4968               }
4969             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4970             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4971             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4972                                 1, NULL_RTX, 1);
4973             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4974             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4975                                 NULL_RTX, 0, OPTAB_WIDEN);
4976             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4977                                  size - 1, NULL_RTX, 0);
4978             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4979                                 NULL_RTX, 0, OPTAB_WIDEN);
4980             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4981                                 NULL_RTX, 0, OPTAB_WIDEN);
4982             expand_inc (quotient, tem);
4983             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4984                                 NULL_RTX, 0, OPTAB_WIDEN);
4985             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4986                                 NULL_RTX, 0, OPTAB_WIDEN);
4987             expand_dec (remainder, tem);
4988             emit_label (label);
4989           }
4990         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4991
4992       default:
4993         gcc_unreachable ();
4994       }
4995
4996   if (quotient == 0)
4997     {
4998       if (target && GET_MODE (target) != compute_mode)
4999         target = 0;
5000
5001       if (rem_flag)
5002         {
5003           /* Try to produce the remainder without producing the quotient.
5004              If we seem to have a divmod pattern that does not require widening,
5005              don't try widening here.  We should really have a WIDEN argument
5006              to expand_twoval_binop, since what we'd really like to do here is
5007              1) try a mod insn in compute_mode
5008              2) try a divmod insn in compute_mode
5009              3) try a div insn in compute_mode and multiply-subtract to get
5010                 remainder
5011              4) try the same things with widening allowed.  */
5012           remainder
5013             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5014                                  op0, op1, target,
5015                                  unsignedp,
5016                                  ((optab_handler (optab2, compute_mode)
5017                                    != CODE_FOR_nothing)
5018                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
5019           if (remainder == 0)
5020             {
5021               /* No luck there.  Can we do remainder and divide at once
5022                  without a library call?  */
5023               remainder = gen_reg_rtx (compute_mode);
5024               if (! expand_twoval_binop ((unsignedp
5025                                           ? udivmod_optab
5026                                           : sdivmod_optab),
5027                                          op0, op1,
5028                                          NULL_RTX, remainder, unsignedp))
5029                 remainder = 0;
5030             }
5031
5032           if (remainder)
5033             return gen_lowpart (mode, remainder);
5034         }
5035
5036       /* Produce the quotient.  Try a quotient insn, but not a library call.
5037          If we have a divmod in this mode, use it in preference to widening
5038          the div (for this test we assume it will not fail). Note that optab2
5039          is set to the one of the two optabs that the call below will use.  */
5040       quotient
5041         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5042                              op0, op1, rem_flag ? NULL_RTX : target,
5043                              unsignedp,
5044                              ((optab_handler (optab2, compute_mode)
5045                                != CODE_FOR_nothing)
5046                               ? OPTAB_DIRECT : OPTAB_WIDEN));
5047
5048       if (quotient == 0)
5049         {
5050           /* No luck there.  Try a quotient-and-remainder insn,
5051              keeping the quotient alone.  */
5052           quotient = gen_reg_rtx (compute_mode);
5053           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5054                                      op0, op1,
5055                                      quotient, NULL_RTX, unsignedp))
5056             {
5057               quotient = 0;
5058               if (! rem_flag)
5059                 /* Still no luck.  If we are not computing the remainder,
5060                    use a library call for the quotient.  */
5061                 quotient = sign_expand_binop (compute_mode,
5062                                               udiv_optab, sdiv_optab,
5063                                               op0, op1, target,
5064                                               unsignedp, OPTAB_LIB_WIDEN);
5065             }
5066         }
5067     }
5068
5069   if (rem_flag)
5070     {
5071       if (target && GET_MODE (target) != compute_mode)
5072         target = 0;
5073
5074       if (quotient == 0)
5075         {
5076           /* No divide instruction either.  Use library for remainder.  */
5077           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5078                                          op0, op1, target,
5079                                          unsignedp, OPTAB_LIB_WIDEN);
5080           /* No remainder function.  Try a quotient-and-remainder
5081              function, keeping the remainder.  */
5082           if (!remainder)
5083             {
5084               remainder = gen_reg_rtx (compute_mode);
5085               if (!expand_twoval_binop_libfunc
5086                   (unsignedp ? udivmod_optab : sdivmod_optab,
5087                    op0, op1,
5088                    NULL_RTX, remainder,
5089                    unsignedp ? UMOD : MOD))
5090                 remainder = NULL_RTX;
5091             }
5092         }
5093       else
5094         {
5095           /* We divided.  Now finish doing X - Y * (X / Y).  */
5096           remainder = expand_mult (compute_mode, quotient, op1,
5097                                    NULL_RTX, unsignedp);
5098           remainder = expand_binop (compute_mode, sub_optab, op0,
5099                                     remainder, target, unsignedp,
5100                                     OPTAB_LIB_WIDEN);
5101         }
5102     }
5103
5104   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5105 }
5106 \f
5107 /* Return a tree node with data type TYPE, describing the value of X.
5108    Usually this is an VAR_DECL, if there is no obvious better choice.
5109    X may be an expression, however we only support those expressions
5110    generated by loop.c.  */
5111
5112 tree
5113 make_tree (tree type, rtx x)
5114 {
5115   tree t;
5116
5117   switch (GET_CODE (x))
5118     {
5119     case CONST_INT:
5120     case CONST_WIDE_INT:
5121       t = wide_int_to_tree (type, std::make_pair (x, TYPE_MODE (type)));
5122       return t;
5123
5124     case CONST_DOUBLE:
5125       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5126       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5127         t = wide_int_to_tree (type,
5128                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5129                                                     HOST_BITS_PER_WIDE_INT * 2));
5130       else
5131         t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5132
5133       return t;
5134
5135     case CONST_VECTOR:
5136       {
5137         int units = CONST_VECTOR_NUNITS (x);
5138         tree itype = TREE_TYPE (type);
5139         tree *elts;
5140         int i;
5141
5142         /* Build a tree with vector elements.  */
5143         elts = XALLOCAVEC (tree, units);
5144         for (i = units - 1; i >= 0; --i)
5145           {
5146             rtx elt = CONST_VECTOR_ELT (x, i);
5147             elts[i] = make_tree (itype, elt);
5148           }
5149
5150         return build_vector (type, elts);
5151       }
5152
5153     case PLUS:
5154       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5155                           make_tree (type, XEXP (x, 1)));
5156
5157     case MINUS:
5158       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5159                           make_tree (type, XEXP (x, 1)));
5160
5161     case NEG:
5162       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5163
5164     case MULT:
5165       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5166                           make_tree (type, XEXP (x, 1)));
5167
5168     case ASHIFT:
5169       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5170                           make_tree (type, XEXP (x, 1)));
5171
5172     case LSHIFTRT:
5173       t = unsigned_type_for (type);
5174       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5175                                          make_tree (t, XEXP (x, 0)),
5176                                          make_tree (type, XEXP (x, 1))));
5177
5178     case ASHIFTRT:
5179       t = signed_type_for (type);
5180       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5181                                          make_tree (t, XEXP (x, 0)),
5182                                          make_tree (type, XEXP (x, 1))));
5183
5184     case DIV:
5185       if (TREE_CODE (type) != REAL_TYPE)
5186         t = signed_type_for (type);
5187       else
5188         t = type;
5189
5190       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5191                                          make_tree (t, XEXP (x, 0)),
5192                                          make_tree (t, XEXP (x, 1))));
5193     case UDIV:
5194       t = unsigned_type_for (type);
5195       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5196                                          make_tree (t, XEXP (x, 0)),
5197                                          make_tree (t, XEXP (x, 1))));
5198
5199     case SIGN_EXTEND:
5200     case ZERO_EXTEND:
5201       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5202                                           GET_CODE (x) == ZERO_EXTEND);
5203       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5204
5205     case CONST:
5206       return make_tree (type, XEXP (x, 0));
5207
5208     case SYMBOL_REF:
5209       t = SYMBOL_REF_DECL (x);
5210       if (t)
5211         return fold_convert (type, build_fold_addr_expr (t));
5212       /* else fall through.  */
5213
5214     default:
5215       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5216
5217       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5218          address mode to pointer mode.  */
5219       if (POINTER_TYPE_P (type))
5220         x = convert_memory_address_addr_space
5221               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5222
5223       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5224          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5225       t->decl_with_rtl.rtl = x;
5226
5227       return t;
5228     }
5229 }
5230 \f
5231 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5232    and returning TARGET.
5233
5234    If TARGET is 0, a pseudo-register or constant is returned.  */
5235
5236 rtx
5237 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5238 {
5239   rtx tem = 0;
5240
5241   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5242     tem = simplify_binary_operation (AND, mode, op0, op1);
5243   if (tem == 0)
5244     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5245
5246   if (target == 0)
5247     target = tem;
5248   else if (tem != target)
5249     emit_move_insn (target, tem);
5250   return target;
5251 }
5252
5253 /* Helper function for emit_store_flag.  */
5254 rtx
5255 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5256              machine_mode mode, machine_mode compare_mode,
5257              int unsignedp, rtx x, rtx y, int normalizep,
5258              machine_mode target_mode)
5259 {
5260   struct expand_operand ops[4];
5261   rtx op0, comparison, subtarget;
5262   rtx_insn *last;
5263   machine_mode result_mode = targetm.cstore_mode (icode);
5264
5265   last = get_last_insn ();
5266   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5267   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5268   if (!x || !y)
5269     {
5270       delete_insns_since (last);
5271       return NULL_RTX;
5272     }
5273
5274   if (target_mode == VOIDmode)
5275     target_mode = result_mode;
5276   if (!target)
5277     target = gen_reg_rtx (target_mode);
5278
5279   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5280
5281   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5282   create_fixed_operand (&ops[1], comparison);
5283   create_fixed_operand (&ops[2], x);
5284   create_fixed_operand (&ops[3], y);
5285   if (!maybe_expand_insn (icode, 4, ops))
5286     {
5287       delete_insns_since (last);
5288       return NULL_RTX;
5289     }
5290   subtarget = ops[0].value;
5291
5292   /* If we are converting to a wider mode, first convert to
5293      TARGET_MODE, then normalize.  This produces better combining
5294      opportunities on machines that have a SIGN_EXTRACT when we are
5295      testing a single bit.  This mostly benefits the 68k.
5296
5297      If STORE_FLAG_VALUE does not have the sign bit set when
5298      interpreted in MODE, we can do this conversion as unsigned, which
5299      is usually more efficient.  */
5300   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5301     {
5302       convert_move (target, subtarget,
5303                     val_signbit_known_clear_p (result_mode,
5304                                                STORE_FLAG_VALUE));
5305       op0 = target;
5306       result_mode = target_mode;
5307     }
5308   else
5309     op0 = subtarget;
5310
5311   /* If we want to keep subexpressions around, don't reuse our last
5312      target.  */
5313   if (optimize)
5314     subtarget = 0;
5315
5316   /* Now normalize to the proper value in MODE.  Sometimes we don't
5317      have to do anything.  */
5318   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5319     ;
5320   /* STORE_FLAG_VALUE might be the most negative number, so write
5321      the comparison this way to avoid a compiler-time warning.  */
5322   else if (- normalizep == STORE_FLAG_VALUE)
5323     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5324
5325   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5326      it hard to use a value of just the sign bit due to ANSI integer
5327      constant typing rules.  */
5328   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5329     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5330                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5331                         normalizep == 1);
5332   else
5333     {
5334       gcc_assert (STORE_FLAG_VALUE & 1);
5335
5336       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5337       if (normalizep == -1)
5338         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5339     }
5340
5341   /* If we were converting to a smaller mode, do the conversion now.  */
5342   if (target_mode != result_mode)
5343     {
5344       convert_move (target, op0, 0);
5345       return target;
5346     }
5347   else
5348     return op0;
5349 }
5350
5351
5352 /* A subroutine of emit_store_flag only including "tricks" that do not
5353    need a recursive call.  These are kept separate to avoid infinite
5354    loops.  */
5355
5356 static rtx
5357 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5358                    machine_mode mode, int unsignedp, int normalizep,
5359                    machine_mode target_mode)
5360 {
5361   rtx subtarget;
5362   enum insn_code icode;
5363   machine_mode compare_mode;
5364   enum mode_class mclass;
5365   enum rtx_code scode;
5366
5367   if (unsignedp)
5368     code = unsigned_condition (code);
5369   scode = swap_condition (code);
5370
5371   /* If one operand is constant, make it the second one.  Only do this
5372      if the other operand is not constant as well.  */
5373
5374   if (swap_commutative_operands_p (op0, op1))
5375     {
5376       std::swap (op0, op1);
5377       code = swap_condition (code);
5378     }
5379
5380   if (mode == VOIDmode)
5381     mode = GET_MODE (op0);
5382
5383   /* For some comparisons with 1 and -1, we can convert this to
5384      comparisons with zero.  This will often produce more opportunities for
5385      store-flag insns.  */
5386
5387   switch (code)
5388     {
5389     case LT:
5390       if (op1 == const1_rtx)
5391         op1 = const0_rtx, code = LE;
5392       break;
5393     case LE:
5394       if (op1 == constm1_rtx)
5395         op1 = const0_rtx, code = LT;
5396       break;
5397     case GE:
5398       if (op1 == const1_rtx)
5399         op1 = const0_rtx, code = GT;
5400       break;
5401     case GT:
5402       if (op1 == constm1_rtx)
5403         op1 = const0_rtx, code = GE;
5404       break;
5405     case GEU:
5406       if (op1 == const1_rtx)
5407         op1 = const0_rtx, code = NE;
5408       break;
5409     case LTU:
5410       if (op1 == const1_rtx)
5411         op1 = const0_rtx, code = EQ;
5412       break;
5413     default:
5414       break;
5415     }
5416
5417   /* If we are comparing a double-word integer with zero or -1, we can
5418      convert the comparison into one involving a single word.  */
5419   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5420       && GET_MODE_CLASS (mode) == MODE_INT
5421       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5422     {
5423       rtx tem;
5424       if ((code == EQ || code == NE)
5425           && (op1 == const0_rtx || op1 == constm1_rtx))
5426         {
5427           rtx op00, op01;
5428
5429           /* Do a logical OR or AND of the two words and compare the
5430              result.  */
5431           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5432           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5433           tem = expand_binop (word_mode,
5434                               op1 == const0_rtx ? ior_optab : and_optab,
5435                               op00, op01, NULL_RTX, unsignedp,
5436                               OPTAB_DIRECT);
5437
5438           if (tem != 0)
5439             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5440                                    unsignedp, normalizep);
5441         }
5442       else if ((code == LT || code == GE) && op1 == const0_rtx)
5443         {
5444           rtx op0h;
5445
5446           /* If testing the sign bit, can just test on high word.  */
5447           op0h = simplify_gen_subreg (word_mode, op0, mode,
5448                                       subreg_highpart_offset (word_mode,
5449                                                               mode));
5450           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5451                                  unsignedp, normalizep);
5452         }
5453       else
5454         tem = NULL_RTX;
5455
5456       if (tem)
5457         {
5458           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5459             return tem;
5460           if (!target)
5461             target = gen_reg_rtx (target_mode);
5462
5463           convert_move (target, tem,
5464                         !val_signbit_known_set_p (word_mode,
5465                                                   (normalizep ? normalizep
5466                                                    : STORE_FLAG_VALUE)));
5467           return target;
5468         }
5469     }
5470
5471   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5472      complement of A (for GE) and shifting the sign bit to the low bit.  */
5473   if (op1 == const0_rtx && (code == LT || code == GE)
5474       && GET_MODE_CLASS (mode) == MODE_INT
5475       && (normalizep || STORE_FLAG_VALUE == 1
5476           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5477     {
5478       subtarget = target;
5479
5480       if (!target)
5481         target_mode = mode;
5482
5483       /* If the result is to be wider than OP0, it is best to convert it
5484          first.  If it is to be narrower, it is *incorrect* to convert it
5485          first.  */
5486       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5487         {
5488           op0 = convert_modes (target_mode, mode, op0, 0);
5489           mode = target_mode;
5490         }
5491
5492       if (target_mode != mode)
5493         subtarget = 0;
5494
5495       if (code == GE)
5496         op0 = expand_unop (mode, one_cmpl_optab, op0,
5497                            ((STORE_FLAG_VALUE == 1 || normalizep)
5498                             ? 0 : subtarget), 0);
5499
5500       if (STORE_FLAG_VALUE == 1 || normalizep)
5501         /* If we are supposed to produce a 0/1 value, we want to do
5502            a logical shift from the sign bit to the low-order bit; for
5503            a -1/0 value, we do an arithmetic shift.  */
5504         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5505                             GET_MODE_BITSIZE (mode) - 1,
5506                             subtarget, normalizep != -1);
5507
5508       if (mode != target_mode)
5509         op0 = convert_modes (target_mode, mode, op0, 0);
5510
5511       return op0;
5512     }
5513
5514   mclass = GET_MODE_CLASS (mode);
5515   for (compare_mode = mode; compare_mode != VOIDmode;
5516        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5517     {
5518      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5519      icode = optab_handler (cstore_optab, optab_mode);
5520      if (icode != CODE_FOR_nothing)
5521         {
5522           do_pending_stack_adjust ();
5523           rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5524                                  unsignedp, op0, op1, normalizep, target_mode);
5525           if (tem)
5526             return tem;
5527
5528           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5529             {
5530               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5531                                  unsignedp, op1, op0, normalizep, target_mode);
5532               if (tem)
5533                 return tem;
5534             }
5535           break;
5536         }
5537     }
5538
5539   return 0;
5540 }
5541
5542 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5543    and storing in TARGET.  Normally return TARGET.
5544    Return 0 if that cannot be done.
5545
5546    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5547    it is VOIDmode, they cannot both be CONST_INT.
5548
5549    UNSIGNEDP is for the case where we have to widen the operands
5550    to perform the operation.  It says to use zero-extension.
5551
5552    NORMALIZEP is 1 if we should convert the result to be either zero
5553    or one.  Normalize is -1 if we should convert the result to be
5554    either zero or -1.  If NORMALIZEP is zero, the result will be left
5555    "raw" out of the scc insn.  */
5556
5557 rtx
5558 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5559                  machine_mode mode, int unsignedp, int normalizep)
5560 {
5561   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5562   enum rtx_code rcode;
5563   rtx subtarget;
5564   rtx tem, trueval;
5565   rtx_insn *last;
5566
5567   /* If we compare constants, we shouldn't use a store-flag operation,
5568      but a constant load.  We can get there via the vanilla route that
5569      usually generates a compare-branch sequence, but will in this case
5570      fold the comparison to a constant, and thus elide the branch.  */
5571   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5572     return NULL_RTX;
5573
5574   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5575                            target_mode);
5576   if (tem)
5577     return tem;
5578
5579   /* If we reached here, we can't do this with a scc insn, however there
5580      are some comparisons that can be done in other ways.  Don't do any
5581      of these cases if branches are very cheap.  */
5582   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5583     return 0;
5584
5585   /* See what we need to return.  We can only return a 1, -1, or the
5586      sign bit.  */
5587
5588   if (normalizep == 0)
5589     {
5590       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5591         normalizep = STORE_FLAG_VALUE;
5592
5593       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5594         ;
5595       else
5596         return 0;
5597     }
5598
5599   last = get_last_insn ();
5600
5601   /* If optimizing, use different pseudo registers for each insn, instead
5602      of reusing the same pseudo.  This leads to better CSE, but slows
5603      down the compiler, since there are more pseudos */
5604   subtarget = (!optimize
5605                && (target_mode == mode)) ? target : NULL_RTX;
5606   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5607
5608   /* For floating-point comparisons, try the reverse comparison or try
5609      changing the "orderedness" of the comparison.  */
5610   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5611     {
5612       enum rtx_code first_code;
5613       bool and_them;
5614
5615       rcode = reverse_condition_maybe_unordered (code);
5616       if (can_compare_p (rcode, mode, ccp_store_flag)
5617           && (code == ORDERED || code == UNORDERED
5618               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5619               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5620         {
5621           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5622                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5623
5624           /* For the reverse comparison, use either an addition or a XOR.  */
5625           if (want_add
5626               && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5627                            optimize_insn_for_speed_p ()) == 0)
5628             {
5629               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5630                                        STORE_FLAG_VALUE, target_mode);
5631               if (tem)
5632                 return expand_binop (target_mode, add_optab, tem,
5633                                      gen_int_mode (normalizep, target_mode),
5634                                      target, 0, OPTAB_WIDEN);
5635             }
5636           else if (!want_add
5637                    && rtx_cost (trueval, mode, XOR, 1,
5638                                 optimize_insn_for_speed_p ()) == 0)
5639             {
5640               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5641                                        normalizep, target_mode);
5642               if (tem)
5643                 return expand_binop (target_mode, xor_optab, tem, trueval,
5644                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5645             }
5646         }
5647
5648       delete_insns_since (last);
5649
5650       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5651       if (code == ORDERED || code == UNORDERED)
5652         return 0;
5653
5654       and_them = split_comparison (code, mode, &first_code, &code);
5655
5656       /* If there are no NaNs, the first comparison should always fall through.
5657          Effectively change the comparison to the other one.  */
5658       if (!HONOR_NANS (mode))
5659         {
5660           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5661           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5662                                     target_mode);
5663         }
5664
5665       if (!HAVE_conditional_move)
5666         return 0;
5667
5668       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5669          conditional move.  */
5670       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5671                                normalizep, target_mode);
5672       if (tem == 0)
5673         return 0;
5674
5675       if (and_them)
5676         tem = emit_conditional_move (target, code, op0, op1, mode,
5677                                      tem, const0_rtx, GET_MODE (tem), 0);
5678       else
5679         tem = emit_conditional_move (target, code, op0, op1, mode,
5680                                      trueval, tem, GET_MODE (tem), 0);
5681
5682       if (tem == 0)
5683         delete_insns_since (last);
5684       return tem;
5685     }
5686
5687   /* The remaining tricks only apply to integer comparisons.  */
5688
5689   if (GET_MODE_CLASS (mode) != MODE_INT)
5690     return 0;
5691
5692   /* If this is an equality comparison of integers, we can try to exclusive-or
5693      (or subtract) the two operands and use a recursive call to try the
5694      comparison with zero.  Don't do any of these cases if branches are
5695      very cheap.  */
5696
5697   if ((code == EQ || code == NE) && op1 != const0_rtx)
5698     {
5699       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5700                           OPTAB_WIDEN);
5701
5702       if (tem == 0)
5703         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5704                             OPTAB_WIDEN);
5705       if (tem != 0)
5706         tem = emit_store_flag (target, code, tem, const0_rtx,
5707                                mode, unsignedp, normalizep);
5708       if (tem != 0)
5709         return tem;
5710
5711       delete_insns_since (last);
5712     }
5713
5714   /* For integer comparisons, try the reverse comparison.  However, for
5715      small X and if we'd have anyway to extend, implementing "X != 0"
5716      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5717   rcode = reverse_condition (code);
5718   if (can_compare_p (rcode, mode, ccp_store_flag)
5719       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5720             && code == NE
5721             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5722             && op1 == const0_rtx))
5723     {
5724       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5725                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5726
5727       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5728       if (want_add
5729           && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5730                        optimize_insn_for_speed_p ()) == 0)
5731         {
5732           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5733                                    STORE_FLAG_VALUE, target_mode);
5734           if (tem != 0)
5735             tem = expand_binop (target_mode, add_optab, tem,
5736                                 gen_int_mode (normalizep, target_mode),
5737                                 target, 0, OPTAB_WIDEN);
5738         }
5739       else if (!want_add
5740                && rtx_cost (trueval, mode, XOR, 1,
5741                             optimize_insn_for_speed_p ()) == 0)
5742         {
5743           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5744                                    normalizep, target_mode);
5745           if (tem != 0)
5746             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5747                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5748         }
5749
5750       if (tem != 0)
5751         return tem;
5752       delete_insns_since (last);
5753     }
5754
5755   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5756      the constant zero.  Reject all other comparisons at this point.  Only
5757      do LE and GT if branches are expensive since they are expensive on
5758      2-operand machines.  */
5759
5760   if (op1 != const0_rtx
5761       || (code != EQ && code != NE
5762           && (BRANCH_COST (optimize_insn_for_speed_p (),
5763                            false) <= 1 || (code != LE && code != GT))))
5764     return 0;
5765
5766   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5767      do the necessary operation below.  */
5768
5769   tem = 0;
5770
5771   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5772      the sign bit set.  */
5773
5774   if (code == LE)
5775     {
5776       /* This is destructive, so SUBTARGET can't be OP0.  */
5777       if (rtx_equal_p (subtarget, op0))
5778         subtarget = 0;
5779
5780       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5781                           OPTAB_WIDEN);
5782       if (tem)
5783         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5784                             OPTAB_WIDEN);
5785     }
5786
5787   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5788      number of bits in the mode of OP0, minus one.  */
5789
5790   if (code == GT)
5791     {
5792       if (rtx_equal_p (subtarget, op0))
5793         subtarget = 0;
5794
5795       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5796                           GET_MODE_BITSIZE (mode) - 1,
5797                           subtarget, 0);
5798       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5799                           OPTAB_WIDEN);
5800     }
5801
5802   if (code == EQ || code == NE)
5803     {
5804       /* For EQ or NE, one way to do the comparison is to apply an operation
5805          that converts the operand into a positive number if it is nonzero
5806          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5807          for NE we negate.  This puts the result in the sign bit.  Then we
5808          normalize with a shift, if needed.
5809
5810          Two operations that can do the above actions are ABS and FFS, so try
5811          them.  If that doesn't work, and MODE is smaller than a full word,
5812          we can use zero-extension to the wider mode (an unsigned conversion)
5813          as the operation.  */
5814
5815       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5816          that is compensated by the subsequent overflow when subtracting
5817          one / negating.  */
5818
5819       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5820         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5821       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5822         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5823       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5824         {
5825           tem = convert_modes (word_mode, mode, op0, 1);
5826           mode = word_mode;
5827         }
5828
5829       if (tem != 0)
5830         {
5831           if (code == EQ)
5832             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5833                                 0, OPTAB_WIDEN);
5834           else
5835             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5836         }
5837
5838       /* If we couldn't do it that way, for NE we can "or" the two's complement
5839          of the value with itself.  For EQ, we take the one's complement of
5840          that "or", which is an extra insn, so we only handle EQ if branches
5841          are expensive.  */
5842
5843       if (tem == 0
5844           && (code == NE
5845               || BRANCH_COST (optimize_insn_for_speed_p (),
5846                               false) > 1))
5847         {
5848           if (rtx_equal_p (subtarget, op0))
5849             subtarget = 0;
5850
5851           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5852           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5853                               OPTAB_WIDEN);
5854
5855           if (tem && code == EQ)
5856             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5857         }
5858     }
5859
5860   if (tem && normalizep)
5861     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5862                         GET_MODE_BITSIZE (mode) - 1,
5863                         subtarget, normalizep == 1);
5864
5865   if (tem)
5866     {
5867       if (!target)
5868         ;
5869       else if (GET_MODE (tem) != target_mode)
5870         {
5871           convert_move (target, tem, 0);
5872           tem = target;
5873         }
5874       else if (!subtarget)
5875         {
5876           emit_move_insn (target, tem);
5877           tem = target;
5878         }
5879     }
5880   else
5881     delete_insns_since (last);
5882
5883   return tem;
5884 }
5885
5886 /* Like emit_store_flag, but always succeeds.  */
5887
5888 rtx
5889 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5890                        machine_mode mode, int unsignedp, int normalizep)
5891 {
5892   rtx tem;
5893   rtx_code_label *label;
5894   rtx trueval, falseval;
5895
5896   /* First see if emit_store_flag can do the job.  */
5897   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5898   if (tem != 0)
5899     return tem;
5900
5901   if (!target)
5902     target = gen_reg_rtx (word_mode);
5903
5904   /* If this failed, we have to do this with set/compare/jump/set code.
5905      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5906   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5907   if (code == NE
5908       && GET_MODE_CLASS (mode) == MODE_INT
5909       && REG_P (target)
5910       && op0 == target
5911       && op1 == const0_rtx)
5912     {
5913       label = gen_label_rtx ();
5914       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
5915                                NULL_RTX, NULL, label, -1);
5916       emit_move_insn (target, trueval);
5917       emit_label (label);
5918       return target;
5919     }
5920
5921   if (!REG_P (target)
5922       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5923     target = gen_reg_rtx (GET_MODE (target));
5924
5925   /* Jump in the right direction if the target cannot implement CODE
5926      but can jump on its reverse condition.  */
5927   falseval = const0_rtx;
5928   if (! can_compare_p (code, mode, ccp_jump)
5929       && (! FLOAT_MODE_P (mode)
5930           || code == ORDERED || code == UNORDERED
5931           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5932           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5933     {
5934       enum rtx_code rcode;
5935       if (FLOAT_MODE_P (mode))
5936         rcode = reverse_condition_maybe_unordered (code);
5937       else
5938         rcode = reverse_condition (code);
5939
5940       /* Canonicalize to UNORDERED for the libcall.  */
5941       if (can_compare_p (rcode, mode, ccp_jump)
5942           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5943         {
5944           falseval = trueval;
5945           trueval = const0_rtx;
5946           code = rcode;
5947         }
5948     }
5949
5950   emit_move_insn (target, trueval);
5951   label = gen_label_rtx ();
5952   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
5953                            label, -1);
5954
5955   emit_move_insn (target, falseval);
5956   emit_label (label);
5957
5958   return target;
5959 }
5960 \f
5961 /* Perform possibly multi-word comparison and conditional jump to LABEL
5962    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5963    now a thin wrapper around do_compare_rtx_and_jump.  */
5964
5965 static void
5966 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
5967                  rtx_code_label *label)
5968 {
5969   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5970   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
5971                            NULL, label, -1);
5972 }