gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2016 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "target.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "predict.h"
  30 #include "tm_p.h"
  31 #include "expmed.h"
  32 #include "optabs.h"
  33 #include "emit-rtl.h"
  34 #include "diagnostic-core.h"
  35 #include "fold-const.h"
  36 #include "stor-layout.h"
  37 #include "dojump.h"
  38 #include "explow.h"
  39 #include "expr.h"
  40 #include "langhooks.h"
  41
  42 struct target_expmed default_target_expmed;
  43 #if SWITCHABLE_TARGET
  44 struct target_expmed *this_target_expmed = &default_target_expmed;
  45 #endif
  46
  47 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  48                                    unsigned HOST_WIDE_INT,
  49                                    unsigned HOST_WIDE_INT,
  50                                    unsigned HOST_WIDE_INT,
  51                                    rtx, bool);
  52 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
  53                                      unsigned HOST_WIDE_INT,
  54                                      rtx, bool);
  55 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  56                                    unsigned HOST_WIDE_INT,
  57                                    unsigned HOST_WIDE_INT,
  58                                    unsigned HOST_WIDE_INT,
  59                                    rtx, bool);
  60 static rtx extract_fixed_bit_field (machine_mode, rtx,
  61                                     unsigned HOST_WIDE_INT,
  62                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  63 static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
  64                                       unsigned HOST_WIDE_INT,
  65                                       unsigned HOST_WIDE_INT, rtx, int, bool);
  66 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  67 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  68                                     unsigned HOST_WIDE_INT, int, bool);
  69 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  70 static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  71 static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  72
  73 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  74    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  75    The mask is truncated if necessary to the width of mode MODE.  The
  76    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
  77
  78 static inline rtx
  79 mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
  80 {
  81   return immed_wide_int_const
  82     (wi::shifted_mask (bitpos, bitsize, complement,
  83                        GET_MODE_PRECISION (mode)), mode);
  84 }
  85
  86 /* Test whether a value is zero of a power of two.  */
  87 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
  88   (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
  89
  90 struct init_expmed_rtl
  91 {
  92   rtx reg;
  93   rtx plus;
  94   rtx neg;
  95   rtx mult;
  96   rtx sdiv;
  97   rtx udiv;
  98   rtx sdiv_32;
  99   rtx smod_32;
 100   rtx wide_mult;
 101   rtx wide_lshr;
 102   rtx wide_trunc;
 103   rtx shift;
 104   rtx shift_mult;
 105   rtx shift_add;
 106   rtx shift_sub0;
 107   rtx shift_sub1;
 108   rtx zext;
 109   rtx trunc;
 110
 111   rtx pow2[MAX_BITS_PER_WORD];
 112   rtx cint[MAX_BITS_PER_WORD];
 113 };
 114
 115 static void
 116 init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
 117                       machine_mode from_mode, bool speed)
 118 {
 119   int to_size, from_size;
 120   rtx which;
 121
 122   to_size = GET_MODE_PRECISION (to_mode);
 123   from_size = GET_MODE_PRECISION (from_mode);
 124
 125   /* Most partial integers have a precision less than the "full"
 126      integer it requires for storage.  In case one doesn't, for
 127      comparison purposes here, reduce the bit size by one in that
 128      case.  */
 129   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 130       && exact_log2 (to_size) != -1)
 131     to_size --;
 132   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 133       && exact_log2 (from_size) != -1)
 134     from_size --;
 135
 136   /* Assume cost of zero-extend and sign-extend is the same.  */
 137   which = (to_size < from_size ? all->trunc : all->zext);
 138
 139   PUT_MODE (all->reg, from_mode);
 140   set_convert_cost (to_mode, from_mode, speed,
 141                     set_src_cost (which, to_mode, speed));
 142 }
 143
 144 static void
 145 init_expmed_one_mode (struct init_expmed_rtl *all,
 146                       machine_mode mode, int speed)
 147 {
 148   int m, n, mode_bitsize;
 149   machine_mode mode_from;
 150
 151   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 152
 153   PUT_MODE (all->reg, mode);
 154   PUT_MODE (all->plus, mode);
 155   PUT_MODE (all->neg, mode);
 156   PUT_MODE (all->mult, mode);
 157   PUT_MODE (all->sdiv, mode);
 158   PUT_MODE (all->udiv, mode);
 159   PUT_MODE (all->sdiv_32, mode);
 160   PUT_MODE (all->smod_32, mode);
 161   PUT_MODE (all->wide_trunc, mode);
 162   PUT_MODE (all->shift, mode);
 163   PUT_MODE (all->shift_mult, mode);
 164   PUT_MODE (all->shift_add, mode);
 165   PUT_MODE (all->shift_sub0, mode);
 166   PUT_MODE (all->shift_sub1, mode);
 167   PUT_MODE (all->zext, mode);
 168   PUT_MODE (all->trunc, mode);
 169
 170   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
 171   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
 172   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
 173   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
 174   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
 175
 176   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
 177                                      <= 2 * add_cost (speed, mode)));
 178   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
 179                                      <= 4 * add_cost (speed, mode)));
 180
 181   set_shift_cost (speed, mode, 0, 0);
 182   {
 183     int cost = add_cost (speed, mode);
 184     set_shiftadd_cost (speed, mode, 0, cost);
 185     set_shiftsub0_cost (speed, mode, 0, cost);
 186     set_shiftsub1_cost (speed, mode, 0, cost);
 187   }
 188
 189   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 190   for (m = 1; m < n; m++)
 191     {
 192       XEXP (all->shift, 1) = all->cint[m];
 193       XEXP (all->shift_mult, 1) = all->pow2[m];
 194
 195       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
 196       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
 197                                                        speed));
 198       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
 199                                                         speed));
 200       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
 201                                                         speed));
 202     }
 203
 204   if (SCALAR_INT_MODE_P (mode))
 205     {
 206       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 207            mode_from = (machine_mode)(mode_from + 1))
 208         init_expmed_one_conv (all, mode, mode_from, speed);
 209     }
 210   if (GET_MODE_CLASS (mode) == MODE_INT)
 211     {
 212       machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
 213       if (wider_mode != VOIDmode)
 214         {
 215           PUT_MODE (all->zext, wider_mode);
 216           PUT_MODE (all->wide_mult, wider_mode);
 217           PUT_MODE (all->wide_lshr, wider_mode);
 218           XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 219
 220           set_mul_widen_cost (speed, wider_mode,
 221                               set_src_cost (all->wide_mult, wider_mode, speed));
 222           set_mul_highpart_cost (speed, mode,
 223                                  set_src_cost (all->wide_trunc, mode, speed));
 224         }
 225     }
 226 }
 227
 228 void
 229 init_expmed (void)
 230 {
 231   struct init_expmed_rtl all;
 232   machine_mode mode = QImode;
 233   int m, speed;
 234
 235   memset (&all, 0, sizeof all);
 236   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 237     {
 238       all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 239       all.cint[m] = GEN_INT (m);
 240     }
 241
 242   /* Avoid using hard regs in ways which may be unsupported.  */
 243   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 244   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 245   all.neg = gen_rtx_NEG (mode, all.reg);
 246   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 247   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 248   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 249   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 250   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 251   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 252   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 253   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 254   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 255   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 256   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 257   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 258   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 259   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 260   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 261
 262   for (speed = 0; speed < 2; speed++)
 263     {
 264       crtl->maybe_hot_insn_p = speed;
 265       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
 266
 267       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 268            mode = (machine_mode)(mode + 1))
 269         init_expmed_one_mode (&all, mode, speed);
 270
 271       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 272         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 273              mode = (machine_mode)(mode + 1))
 274           init_expmed_one_mode (&all, mode, speed);
 275
 276       if (MIN_MODE_VECTOR_INT != VOIDmode)
 277         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 278              mode = (machine_mode)(mode + 1))
 279           init_expmed_one_mode (&all, mode, speed);
 280     }
 281
 282   if (alg_hash_used_p ())
 283     {
 284       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 285       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 286     }
 287   else
 288     set_alg_hash_used_p (true);
 289   default_rtl_profile ();
 290
 291   ggc_free (all.trunc);
 292   ggc_free (all.shift_sub1);
 293   ggc_free (all.shift_sub0);
 294   ggc_free (all.shift_add);
 295   ggc_free (all.shift_mult);
 296   ggc_free (all.shift);
 297   ggc_free (all.wide_trunc);
 298   ggc_free (all.wide_lshr);
 299   ggc_free (all.wide_mult);
 300   ggc_free (all.zext);
 301   ggc_free (all.smod_32);
 302   ggc_free (all.sdiv_32);
 303   ggc_free (all.udiv);
 304   ggc_free (all.sdiv);
 305   ggc_free (all.mult);
 306   ggc_free (all.neg);
 307   ggc_free (all.plus);
 308   ggc_free (all.reg);
 309 }
 310
 311 /* Return an rtx representing minus the value of X.
 312    MODE is the intended mode of the result,
 313    useful if X is a CONST_INT.  */
 314
 315 rtx
 316 negate_rtx (machine_mode mode, rtx x)
 317 {
 318   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 319
 320   if (result == 0)
 321     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 322
 323   return result;
 324 }
 325
 326 /* Whether reverse storage order is supported on the target.  */
 327 static int reverse_storage_order_supported = -1;
 328
 329 /* Check whether reverse storage order is supported on the target.  */
 330
 331 static void
 332 check_reverse_storage_order_support (void)
 333 {
 334   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 335     {
 336       reverse_storage_order_supported = 0;
 337       sorry ("reverse scalar storage order");
 338     }
 339   else
 340     reverse_storage_order_supported = 1;
 341 }
 342
 343 /* Whether reverse FP storage order is supported on the target.  */
 344 static int reverse_float_storage_order_supported = -1;
 345
 346 /* Check whether reverse FP storage order is supported on the target.  */
 347
 348 static void
 349 check_reverse_float_storage_order_support (void)
 350 {
 351   if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 352     {
 353       reverse_float_storage_order_supported = 0;
 354       sorry ("reverse floating-point scalar storage order");
 355     }
 356   else
 357     reverse_float_storage_order_supported = 1;
 358 }
 359
 360 /* Return an rtx representing value of X with reverse storage order.
 361    MODE is the intended mode of the result,
 362    useful if X is a CONST_INT.  */
 363
 364 rtx
 365 flip_storage_order (enum machine_mode mode, rtx x)
 366 {
 367   enum machine_mode int_mode;
 368   rtx result;
 369
 370   if (mode == QImode)
 371     return x;
 372
 373   if (COMPLEX_MODE_P (mode))
 374     {
 375       rtx real = read_complex_part (x, false);
 376       rtx imag = read_complex_part (x, true);
 377
 378       real = flip_storage_order (GET_MODE_INNER (mode), real);
 379       imag = flip_storage_order (GET_MODE_INNER (mode), imag);
 380
 381       return gen_rtx_CONCAT (mode, real, imag);
 382     }
 383
 384   if (__builtin_expect (reverse_storage_order_supported < 0, 0))
 385     check_reverse_storage_order_support ();
 386
 387   if (SCALAR_INT_MODE_P (mode))
 388     int_mode = mode;
 389   else
 390     {
 391       if (FLOAT_MODE_P (mode)
 392           && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
 393         check_reverse_float_storage_order_support ();
 394
 395       int_mode = mode_for_size (GET_MODE_PRECISION (mode), MODE_INT, 0);
 396       if (int_mode == BLKmode)
 397         {
 398           sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
 399           return x;
 400         }
 401       x = gen_lowpart (int_mode, x);
 402     }
 403
 404   result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
 405   if (result == 0)
 406     result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
 407
 408   if (int_mode != mode)
 409     result = gen_lowpart (mode, result);
 410
 411   return result;
 412 }
 413
 414 /* Adjust bitfield memory MEM so that it points to the first unit of mode
 415    MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
 416    If MODE is BLKmode, return a reference to every byte in the bitfield.
 417    Set *NEW_BITNUM to the bit position of the field within the new memory.  */
 418
 419 static rtx
 420 narrow_bit_field_mem (rtx mem, machine_mode mode,
 421                       unsigned HOST_WIDE_INT bitsize,
 422                       unsigned HOST_WIDE_INT bitnum,
 423                       unsigned HOST_WIDE_INT *new_bitnum)
 424 {
 425   if (mode == BLKmode)
 426     {
 427       *new_bitnum = bitnum % BITS_PER_UNIT;
 428       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 429       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 430                             / BITS_PER_UNIT);
 431       return adjust_bitfield_address_size (mem, mode, offset, size);
 432     }
 433   else
 434     {
 435       unsigned int unit = GET_MODE_BITSIZE (mode);
 436       *new_bitnum = bitnum % unit;
 437       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 438       return adjust_bitfield_address (mem, mode, offset);
 439     }
 440 }
 441
 442 /* The caller wants to perform insertion or extraction PATTERN on a
 443    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 444    BITREGION_START and BITREGION_END are as for store_bit_field
 445    and FIELDMODE is the natural mode of the field.
 446
 447    Search for a mode that is compatible with the memory access
 448    restrictions and (where applicable) with a register insertion or
 449    extraction.  Return the new memory on success, storing the adjusted
 450    bit position in *NEW_BITNUM.  Return null otherwise.  */
 451
 452 static rtx
 453 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 454                               rtx op0, HOST_WIDE_INT bitsize,
 455                               HOST_WIDE_INT bitnum,
 456                               unsigned HOST_WIDE_INT bitregion_start,
 457                               unsigned HOST_WIDE_INT bitregion_end,
 458                               machine_mode fieldmode,
 459                               unsigned HOST_WIDE_INT *new_bitnum)
 460 {
 461   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 462                                 bitregion_end, MEM_ALIGN (op0),
 463                                 MEM_VOLATILE_P (op0));
 464   machine_mode best_mode;
 465   if (iter.next_mode (&best_mode))
 466     {
 467       /* We can use a memory in BEST_MODE.  See whether this is true for
 468          any wider modes.  All other things being equal, we prefer to
 469          use the widest mode possible because it tends to expose more
 470          CSE opportunities.  */
 471       if (!iter.prefer_smaller_modes ())
 472         {
 473           /* Limit the search to the mode required by the corresponding
 474              register insertion or extraction instruction, if any.  */
 475           machine_mode limit_mode = word_mode;
 476           extraction_insn insn;
 477           if (get_best_reg_extraction_insn (&insn, pattern,
 478                                             GET_MODE_BITSIZE (best_mode),
 479                                             fieldmode))
 480             limit_mode = insn.field_mode;
 481
 482           machine_mode wider_mode;
 483           while (iter.next_mode (&wider_mode)
 484                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 485             best_mode = wider_mode;
 486         }
 487       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 488                                    new_bitnum);
 489     }
 490   return NULL_RTX;
 491 }
 492
 493 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 494    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 495    offset is then BITNUM / BITS_PER_UNIT.  */
 496
 497 static bool
 498 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 499                      unsigned HOST_WIDE_INT bitsize,
 500                      machine_mode struct_mode)
 501 {
 502   if (BYTES_BIG_ENDIAN)
 503     return (bitnum % BITS_PER_UNIT == 0
 504             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 505                 || (bitnum + bitsize) % BITS_PER_WORD == 0));
 506   else
 507     return bitnum % BITS_PER_WORD == 0;
 508 }
 509
 510 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 511    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 512    Return false if the access would touch memory outside the range
 513    BITREGION_START to BITREGION_END for conformance to the C++ memory
 514    model.  */
 515
 516 static bool
 517 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 518                             unsigned HOST_WIDE_INT bitnum,
 519                             machine_mode fieldmode,
 520                             unsigned HOST_WIDE_INT bitregion_start,
 521                             unsigned HOST_WIDE_INT bitregion_end)
 522 {
 523   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 524
 525   /* -fstrict-volatile-bitfields must be enabled and we must have a
 526      volatile MEM.  */
 527   if (!MEM_P (op0)
 528       || !MEM_VOLATILE_P (op0)
 529       || flag_strict_volatile_bitfields <= 0)
 530     return false;
 531
 532   /* Non-integral modes likely only happen with packed structures.
 533      Punt.  */
 534   if (!SCALAR_INT_MODE_P (fieldmode))
 535     return false;
 536
 537   /* The bit size must not be larger than the field mode, and
 538      the field mode must not be larger than a word.  */
 539   if (bitsize > modesize || modesize > BITS_PER_WORD)
 540     return false;
 541
 542   /* Check for cases of unaligned fields that must be split.  */
 543   if (bitnum % modesize + bitsize > modesize)
 544     return false;
 545
 546   /* The memory must be sufficiently aligned for a MODESIZE access.
 547      This condition guarantees, that the memory access will not
 548      touch anything after the end of the structure.  */
 549   if (MEM_ALIGN (op0) < modesize)
 550     return false;
 551
 552   /* Check for cases where the C++ memory model applies.  */
 553   if (bitregion_end != 0
 554       && (bitnum - bitnum % modesize < bitregion_start
 555           || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
 556     return false;
 557
 558   return true;
 559 }
 560
 561 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 562    bit number BITNUM can be treated as a simple value of mode MODE.  */
 563
 564 static bool
 565 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 566                        unsigned HOST_WIDE_INT bitnum, machine_mode mode)
 567 {
 568   return (MEM_P (op0)
 569           && bitnum % BITS_PER_UNIT == 0
 570           && bitsize == GET_MODE_BITSIZE (mode)
 571           && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
 572               || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
 573                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 574 }
 575 \f
 576 /* Try to use instruction INSV to store VALUE into a field of OP0.
 577    BITSIZE and BITNUM are as for store_bit_field.  */
 578
 579 static bool
 580 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 581                             unsigned HOST_WIDE_INT bitsize,
 582                             unsigned HOST_WIDE_INT bitnum,
 583                             rtx value)
 584 {
 585   struct expand_operand ops[4];
 586   rtx value1;
 587   rtx xop0 = op0;
 588   rtx_insn *last = get_last_insn ();
 589   bool copy_back = false;
 590
 591   machine_mode op_mode = insv->field_mode;
 592   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 593   if (bitsize == 0 || bitsize > unit)
 594     return false;
 595
 596   if (MEM_P (xop0))
 597     /* Get a reference to the first byte of the field.  */
 598     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 599                                  &bitnum);
 600   else
 601     {
 602       /* Convert from counting within OP0 to counting in OP_MODE.  */
 603       if (BYTES_BIG_ENDIAN)
 604         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 605
 606       /* If xop0 is a register, we need it in OP_MODE
 607          to make it acceptable to the format of insv.  */
 608       if (GET_CODE (xop0) == SUBREG)
 609         /* We can't just change the mode, because this might clobber op0,
 610            and we will need the original value of op0 if insv fails.  */
 611         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 612       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 613         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 614     }
 615
 616   /* If the destination is a paradoxical subreg such that we need a
 617      truncate to the inner mode, perform the insertion on a temporary and
 618      truncate the result to the original destination.  Note that we can't
 619      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 620      X) 0)) is (reg:N X).  */
 621   if (GET_CODE (xop0) == SUBREG
 622       && REG_P (SUBREG_REG (xop0))
 623       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 624                                          op_mode))
 625     {
 626       rtx tem = gen_reg_rtx (op_mode);
 627       emit_move_insn (tem, xop0);
 628       xop0 = tem;
 629       copy_back = true;
 630     }
 631
 632   /* There are similar overflow check at the start of store_bit_field_1,
 633      but that only check the situation where the field lies completely
 634      outside the register, while there do have situation where the field
 635      lies partialy in the register, we need to adjust bitsize for this
 636      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
 637      will broken on those arch support bit insert instruction, like arm, aarch64
 638      etc.  */
 639   if (bitsize + bitnum > unit && bitnum < unit)
 640     {
 641       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
 642                "destination object, data truncated into %wu-bit",
 643                bitsize, unit - bitnum);
 644       bitsize = unit - bitnum;
 645     }
 646
 647   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 648      "backwards" from the size of the unit we are inserting into.
 649      Otherwise, we count bits from the most significant on a
 650      BYTES/BITS_BIG_ENDIAN machine.  */
 651
 652   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 653     bitnum = unit - bitsize - bitnum;
 654
 655   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 656   value1 = value;
 657   if (GET_MODE (value) != op_mode)
 658     {
 659       if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 660         {
 661           /* Optimization: Don't bother really extending VALUE
 662              if it has all the bits we will actually use.  However,
 663              if we must narrow it, be sure we do it correctly.  */
 664
 665           if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 666             {
 667               rtx tmp;
 668
 669               tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 670               if (! tmp)
 671                 tmp = simplify_gen_subreg (op_mode,
 672                                            force_reg (GET_MODE (value),
 673                                                       value1),
 674                                            GET_MODE (value), 0);
 675               value1 = tmp;
 676             }
 677           else
 678             value1 = gen_lowpart (op_mode, value1);
 679         }
 680       else if (CONST_INT_P (value))
 681         value1 = gen_int_mode (INTVAL (value), op_mode);
 682       else
 683         /* Parse phase is supposed to make VALUE's data type
 684            match that of the component reference, which is a type
 685            at least as wide as the field; so VALUE should have
 686            a mode that corresponds to that type.  */
 687         gcc_assert (CONSTANT_P (value));
 688     }
 689
 690   create_fixed_operand (&ops[0], xop0);
 691   create_integer_operand (&ops[1], bitsize);
 692   create_integer_operand (&ops[2], bitnum);
 693   create_input_operand (&ops[3], value1, op_mode);
 694   if (maybe_expand_insn (insv->icode, 4, ops))
 695     {
 696       if (copy_back)
 697         convert_move (op0, xop0, true);
 698       return true;
 699     }
 700   delete_insns_since (last);
 701   return false;
 702 }
 703
 704 /* A subroutine of store_bit_field, with the same arguments.  Return true
 705    if the operation could be implemented.
 706
 707    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 708    no other way of implementing the operation.  If FALLBACK_P is false,
 709    return false instead.  */
 710
 711 static bool
 712 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 713                    unsigned HOST_WIDE_INT bitnum,
 714                    unsigned HOST_WIDE_INT bitregion_start,
 715                    unsigned HOST_WIDE_INT bitregion_end,
 716                    machine_mode fieldmode,
 717                    rtx value, bool reverse, bool fallback_p)
 718 {
 719   rtx op0 = str_rtx;
 720   rtx orig_value;
 721
 722   while (GET_CODE (op0) == SUBREG)
 723     {
 724       /* The following line once was done only if WORDS_BIG_ENDIAN,
 725          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 726          meaningful at a much higher level; when structures are copied
 727          between memory and regs, the higher-numbered regs
 728          always get higher addresses.  */
 729       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 730       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 731       int byte_offset = 0;
 732
 733       /* Paradoxical subregs need special handling on big-endian machines.  */
 734       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 735         {
 736           int difference = inner_mode_size - outer_mode_size;
 737
 738           if (WORDS_BIG_ENDIAN)
 739             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 740           if (BYTES_BIG_ENDIAN)
 741             byte_offset += difference % UNITS_PER_WORD;
 742         }
 743       else
 744         byte_offset = SUBREG_BYTE (op0);
 745
 746       bitnum += byte_offset * BITS_PER_UNIT;
 747       op0 = SUBREG_REG (op0);
 748     }
 749
 750   /* No action is needed if the target is a register and if the field
 751      lies completely outside that register.  This can occur if the source
 752      code contains an out-of-bounds access to a small array.  */
 753   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 754     return true;
 755
 756   /* Use vec_set patterns for inserting parts of vectors whenever
 757      available.  */
 758   if (VECTOR_MODE_P (GET_MODE (op0))
 759       && !MEM_P (op0)
 760       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 761       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 762       && bitsize == GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
 763       && !(bitnum % GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
 764     {
 765       struct expand_operand ops[3];
 766       machine_mode outermode = GET_MODE (op0);
 767       machine_mode innermode = GET_MODE_INNER (outermode);
 768       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 769       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 770
 771       create_fixed_operand (&ops[0], op0);
 772       create_input_operand (&ops[1], value, innermode);
 773       create_integer_operand (&ops[2], pos);
 774       if (maybe_expand_insn (icode, 3, ops))
 775         return true;
 776     }
 777
 778   /* If the target is a register, overwriting the entire object, or storing
 779      a full-word or multi-word field can be done with just a SUBREG.  */
 780   if (!MEM_P (op0)
 781       && bitsize == GET_MODE_BITSIZE (fieldmode)
 782       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 783           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 784     {
 785       /* Use the subreg machinery either to narrow OP0 to the required
 786          words or to cope with mode punning between equal-sized modes.
 787          In the latter case, use subreg on the rhs side, not lhs.  */
 788       rtx sub;
 789
 790       if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
 791         {
 792           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 793           if (sub)
 794             {
 795               if (reverse)
 796                 sub = flip_storage_order (GET_MODE (op0), sub);
 797               emit_move_insn (op0, sub);
 798               return true;
 799             }
 800         }
 801       else
 802         {
 803           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 804                                      bitnum / BITS_PER_UNIT);
 805           if (sub)
 806             {
 807               if (reverse)
 808                 value = flip_storage_order (fieldmode, value);
 809               emit_move_insn (sub, value);
 810               return true;
 811             }
 812         }
 813     }
 814
 815   /* If the target is memory, storing any naturally aligned field can be
 816      done with a simple store.  For targets that support fast unaligned
 817      memory, any naturally sized, unit aligned field can be done directly.  */
 818   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
 819     {
 820       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 821       if (reverse)
 822         value = flip_storage_order (fieldmode, value);
 823       emit_move_insn (op0, value);
 824       return true;
 825     }
 826
 827   /* Make sure we are playing with integral modes.  Pun with subregs
 828      if we aren't.  This must come after the entire register case above,
 829      since that case is valid for any mode.  The following cases are only
 830      valid for integral modes.  */
 831   {
 832     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 833     if (imode != GET_MODE (op0))
 834       {
 835         if (MEM_P (op0))
 836           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
 837         else
 838           {
 839             gcc_assert (imode != BLKmode);
 840             op0 = gen_lowpart (imode, op0);
 841           }
 842       }
 843   }
 844
 845   /* Storing an lsb-aligned field in a register
 846      can be done with a movstrict instruction.  */
 847
 848   if (!MEM_P (op0)
 849       && !reverse
 850       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 851       && bitsize == GET_MODE_BITSIZE (fieldmode)
 852       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 853     {
 854       struct expand_operand ops[2];
 855       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 856       rtx arg0 = op0;
 857       unsigned HOST_WIDE_INT subreg_off;
 858
 859       if (GET_CODE (arg0) == SUBREG)
 860         {
 861           /* Else we've got some float mode source being extracted into
 862              a different float mode destination -- this combination of
 863              subregs results in Severe Tire Damage.  */
 864           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 865                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 866                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 867           arg0 = SUBREG_REG (arg0);
 868         }
 869
 870       subreg_off = bitnum / BITS_PER_UNIT;
 871       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 872         {
 873           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 874
 875           create_fixed_operand (&ops[0], arg0);
 876           /* Shrink the source operand to FIELDMODE.  */
 877           create_convert_operand_to (&ops[1], value, fieldmode, false);
 878           if (maybe_expand_insn (icode, 2, ops))
 879             return true;
 880         }
 881     }
 882
 883   /* Handle fields bigger than a word.  */
 884
 885   if (bitsize > BITS_PER_WORD)
 886     {
 887       /* Here we transfer the words of the field
 888          in the order least significant first.
 889          This is because the most significant word is the one which may
 890          be less than full.
 891          However, only do that if the value is not BLKmode.  */
 892
 893       const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 894       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 895       unsigned int i;
 896       rtx_insn *last;
 897
 898       /* This is the mode we must force value to, so that there will be enough
 899          subwords to extract.  Note that fieldmode will often (always?) be
 900          VOIDmode, because that is what store_field uses to indicate that this
 901          is a bit field, but passing VOIDmode to operand_subword_force
 902          is not allowed.  */
 903       fieldmode = GET_MODE (value);
 904       if (fieldmode == VOIDmode)
 905         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 906
 907       last = get_last_insn ();
 908       for (i = 0; i < nwords; i++)
 909         {
 910           /* If I is 0, use the low-order word in both field and target;
 911              if I is 1, use the next to lowest word; and so on.  */
 912           unsigned int wordnum = (backwards
 913                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 914                                   - i - 1
 915                                   : i);
 916           unsigned int bit_offset = (backwards ^ reverse
 917                                      ? MAX ((int) bitsize - ((int) i + 1)
 918                                             * BITS_PER_WORD,
 919                                             0)
 920                                      : (int) i * BITS_PER_WORD);
 921           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 922           unsigned HOST_WIDE_INT new_bitsize =
 923             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 924
 925           /* If the remaining chunk doesn't have full wordsize we have
 926              to make sure that for big-endian machines the higher order
 927              bits are used.  */
 928           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 929             value_word = simplify_expand_binop (word_mode, lshr_optab,
 930                                                 value_word,
 931                                                 GEN_INT (BITS_PER_WORD
 932                                                          - new_bitsize),
 933                                                 NULL_RTX, true,
 934                                                 OPTAB_LIB_WIDEN);
 935
 936           if (!store_bit_field_1 (op0, new_bitsize,
 937                                   bitnum + bit_offset,
 938                                   bitregion_start, bitregion_end,
 939                                   word_mode,
 940                                   value_word, reverse, fallback_p))
 941             {
 942               delete_insns_since (last);
 943               return false;
 944             }
 945         }
 946       return true;
 947     }
 948
 949   /* If VALUE has a floating-point or complex mode, access it as an
 950      integer of the corresponding size.  This can occur on a machine
 951      with 64 bit registers that uses SFmode for float.  It can also
 952      occur for unaligned float or complex fields.  */
 953   orig_value = value;
 954   if (GET_MODE (value) != VOIDmode
 955       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 956       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 957     {
 958       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 959       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 960     }
 961
 962   /* If OP0 is a multi-word register, narrow it to the affected word.
 963      If the region spans two words, defer to store_split_bit_field.  */
 964   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 965     {
 966       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
 967                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 968       gcc_assert (op0);
 969       bitnum %= BITS_PER_WORD;
 970       if (bitnum + bitsize > BITS_PER_WORD)
 971         {
 972           if (!fallback_p)
 973             return false;
 974
 975           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 976                                  bitregion_end, value, reverse);
 977           return true;
 978         }
 979     }
 980
 981   /* From here on we can assume that the field to be stored in fits
 982      within a word.  If the destination is a register, it too fits
 983      in a word.  */
 984
 985   extraction_insn insv;
 986   if (!MEM_P (op0)
 987       && !reverse
 988       && get_best_reg_extraction_insn (&insv, EP_insv,
 989                                        GET_MODE_BITSIZE (GET_MODE (op0)),
 990                                        fieldmode)
 991       && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 992     return true;
 993
 994   /* If OP0 is a memory, try copying it to a register and seeing if a
 995      cheap register alternative is available.  */
 996   if (MEM_P (op0) && !reverse)
 997     {
 998       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
 999                                         fieldmode)
1000           && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
1001         return true;
1002
1003       rtx_insn *last = get_last_insn ();
1004
1005       /* Try loading part of OP0 into a register, inserting the bitfield
1006          into that, and then copying the result back to OP0.  */
1007       unsigned HOST_WIDE_INT bitpos;
1008       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1009                                                bitregion_start, bitregion_end,
1010                                                fieldmode, &bitpos);
1011       if (xop0)
1012         {
1013           rtx tempreg = copy_to_reg (xop0);
1014           if (store_bit_field_1 (tempreg, bitsize, bitpos,
1015                                  bitregion_start, bitregion_end,
1016                                  fieldmode, orig_value, reverse, false))
1017             {
1018               emit_move_insn (xop0, tempreg);
1019               return true;
1020             }
1021           delete_insns_since (last);
1022         }
1023     }
1024
1025   if (!fallback_p)
1026     return false;
1027
1028   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
1029                          bitregion_end, value, reverse);
1030   return true;
1031 }
1032
1033 /* Generate code to store value from rtx VALUE
1034    into a bit-field within structure STR_RTX
1035    containing BITSIZE bits starting at bit BITNUM.
1036
1037    BITREGION_START is bitpos of the first bitfield in this region.
1038    BITREGION_END is the bitpos of the ending bitfield in this region.
1039    These two fields are 0, if the C++ memory model does not apply,
1040    or we are not interested in keeping track of bitfield regions.
1041
1042    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1043
1044    If REVERSE is true, the store is to be done in reverse order.  */
1045
1046 void
1047 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1048                  unsigned HOST_WIDE_INT bitnum,
1049                  unsigned HOST_WIDE_INT bitregion_start,
1050                  unsigned HOST_WIDE_INT bitregion_end,
1051                  machine_mode fieldmode,
1052                  rtx value, bool reverse)
1053 {
1054   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1055   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
1056                                   bitregion_start, bitregion_end))
1057     {
1058       /* Storing of a full word can be done with a simple store.
1059          We know here that the field can be accessed with one single
1060          instruction.  For targets that support unaligned memory,
1061          an unaligned access may be necessary.  */
1062       if (bitsize == GET_MODE_BITSIZE (fieldmode))
1063         {
1064           str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
1065                                              bitnum / BITS_PER_UNIT);
1066           if (reverse)
1067             value = flip_storage_order (fieldmode, value);
1068           gcc_assert (bitnum % BITS_PER_UNIT == 0);
1069           emit_move_insn (str_rtx, value);
1070         }
1071       else
1072         {
1073           rtx temp;
1074
1075           str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
1076                                           &bitnum);
1077           gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (fieldmode));
1078           temp = copy_to_reg (str_rtx);
1079           if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
1080                                   fieldmode, value, reverse, true))
1081             gcc_unreachable ();
1082
1083           emit_move_insn (str_rtx, temp);
1084         }
1085
1086       return;
1087     }
1088
1089   /* Under the C++0x memory model, we must not touch bits outside the
1090      bit region.  Adjust the address to start at the beginning of the
1091      bit region.  */
1092   if (MEM_P (str_rtx) && bitregion_start > 0)
1093     {
1094       machine_mode bestmode;
1095       HOST_WIDE_INT offset, size;
1096
1097       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1098
1099       offset = bitregion_start / BITS_PER_UNIT;
1100       bitnum -= bitregion_start;
1101       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1102       bitregion_end -= bitregion_start;
1103       bitregion_start = 0;
1104       bestmode = get_best_mode (bitsize, bitnum,
1105                                 bitregion_start, bitregion_end,
1106                                 MEM_ALIGN (str_rtx), VOIDmode,
1107                                 MEM_VOLATILE_P (str_rtx));
1108       str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
1109     }
1110
1111   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1112                           bitregion_start, bitregion_end,
1113                           fieldmode, value, reverse, true))
1114     gcc_unreachable ();
1115 }
1116 \f
1117 /* Use shifts and boolean operations to store VALUE into a bit field of
1118    width BITSIZE in OP0, starting at bit BITNUM.
1119
1120    If REVERSE is true, the store is to be done in reverse order.  */
1121
1122 static void
1123 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1124                        unsigned HOST_WIDE_INT bitnum,
1125                        unsigned HOST_WIDE_INT bitregion_start,
1126                        unsigned HOST_WIDE_INT bitregion_end,
1127                        rtx value, bool reverse)
1128 {
1129   /* There is a case not handled here:
1130      a structure with a known alignment of just a halfword
1131      and a field split across two aligned halfwords within the structure.
1132      Or likewise a structure with a known alignment of just a byte
1133      and a field split across two bytes.
1134      Such cases are not supposed to be able to occur.  */
1135
1136   if (MEM_P (op0))
1137     {
1138       machine_mode mode = GET_MODE (op0);
1139       if (GET_MODE_BITSIZE (mode) == 0
1140           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1141         mode = word_mode;
1142       mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1143                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1144
1145       if (mode == VOIDmode)
1146         {
1147           /* The only way this should occur is if the field spans word
1148              boundaries.  */
1149           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1150                                  bitregion_end, value, reverse);
1151           return;
1152         }
1153
1154       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1155     }
1156
1157   store_fixed_bit_field_1 (op0, bitsize, bitnum, value, reverse);
1158 }
1159
1160 /* Helper function for store_fixed_bit_field, stores
1161    the bit field always using the MODE of OP0.  */
1162
1163 static void
1164 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1165                          unsigned HOST_WIDE_INT bitnum,
1166                          rtx value, bool reverse)
1167 {
1168   machine_mode mode;
1169   rtx temp;
1170   int all_zero = 0;
1171   int all_one = 0;
1172
1173   mode = GET_MODE (op0);
1174   gcc_assert (SCALAR_INT_MODE_P (mode));
1175
1176   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1177      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1178
1179   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1180     /* BITNUM is the distance between our msb
1181        and that of the containing datum.
1182        Convert it to the distance from the lsb.  */
1183     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1184
1185   /* Now BITNUM is always the distance between our lsb
1186      and that of OP0.  */
1187
1188   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1189      we must first convert its mode to MODE.  */
1190
1191   if (CONST_INT_P (value))
1192     {
1193       unsigned HOST_WIDE_INT v = UINTVAL (value);
1194
1195       if (bitsize < HOST_BITS_PER_WIDE_INT)
1196         v &= ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1;
1197
1198       if (v == 0)
1199         all_zero = 1;
1200       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1201                 && v == ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1)
1202                || (bitsize == HOST_BITS_PER_WIDE_INT
1203                    && v == (unsigned HOST_WIDE_INT) -1))
1204         all_one = 1;
1205
1206       value = lshift_value (mode, v, bitnum);
1207     }
1208   else
1209     {
1210       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1211                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1212
1213       if (GET_MODE (value) != mode)
1214         value = convert_to_mode (mode, value, 1);
1215
1216       if (must_and)
1217         value = expand_binop (mode, and_optab, value,
1218                               mask_rtx (mode, 0, bitsize, 0),
1219                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1220       if (bitnum > 0)
1221         value = expand_shift (LSHIFT_EXPR, mode, value,
1222                               bitnum, NULL_RTX, 1);
1223     }
1224
1225   if (reverse)
1226     value = flip_storage_order (mode, value);
1227
1228   /* Now clear the chosen bits in OP0,
1229      except that if VALUE is -1 we need not bother.  */
1230   /* We keep the intermediates in registers to allow CSE to combine
1231      consecutive bitfield assignments.  */
1232
1233   temp = force_reg (mode, op0);
1234
1235   if (! all_one)
1236     {
1237       rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1238       if (reverse)
1239         mask = flip_storage_order (mode, mask);
1240       temp = expand_binop (mode, and_optab, temp, mask,
1241                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1242       temp = force_reg (mode, temp);
1243     }
1244
1245   /* Now logical-or VALUE into OP0, unless it is zero.  */
1246
1247   if (! all_zero)
1248     {
1249       temp = expand_binop (mode, ior_optab, temp, value,
1250                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1251       temp = force_reg (mode, temp);
1252     }
1253
1254   if (op0 != temp)
1255     {
1256       op0 = copy_rtx (op0);
1257       emit_move_insn (op0, temp);
1258     }
1259 }
1260 \f
1261 /* Store a bit field that is split across multiple accessible memory objects.
1262
1263    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1264    BITSIZE is the field width; BITPOS the position of its first bit
1265    (within the word).
1266    VALUE is the value to store.
1267
1268    If REVERSE is true, the store is to be done in reverse order.
1269
1270    This does not yet handle fields wider than BITS_PER_WORD.  */
1271
1272 static void
1273 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1274                        unsigned HOST_WIDE_INT bitpos,
1275                        unsigned HOST_WIDE_INT bitregion_start,
1276                        unsigned HOST_WIDE_INT bitregion_end,
1277                        rtx value, bool reverse)
1278 {
1279   unsigned int unit, total_bits, bitsdone = 0;
1280
1281   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1282      much at a time.  */
1283   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1284     unit = BITS_PER_WORD;
1285   else
1286     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1287
1288   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1289      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1290      again, and we will mutually recurse forever.  */
1291   if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1292     unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1293
1294   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1295      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1296      that VALUE might be a floating-point constant.  */
1297   if (CONSTANT_P (value) && !CONST_INT_P (value))
1298     {
1299       rtx word = gen_lowpart_common (word_mode, value);
1300
1301       if (word && (value != word))
1302         value = word;
1303       else
1304         value = gen_lowpart_common (word_mode,
1305                                     force_reg (GET_MODE (value) != VOIDmode
1306                                                ? GET_MODE (value)
1307                                                : word_mode, value));
1308     }
1309
1310   total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1311
1312   while (bitsdone < bitsize)
1313     {
1314       unsigned HOST_WIDE_INT thissize;
1315       unsigned HOST_WIDE_INT thispos;
1316       unsigned HOST_WIDE_INT offset;
1317       rtx part, word;
1318
1319       offset = (bitpos + bitsdone) / unit;
1320       thispos = (bitpos + bitsdone) % unit;
1321
1322       /* When region of bytes we can touch is restricted, decrease
1323          UNIT close to the end of the region as needed.  If op0 is a REG
1324          or SUBREG of REG, don't do this, as there can't be data races
1325          on a register and we can expand shorter code in some cases.  */
1326       if (bitregion_end
1327           && unit > BITS_PER_UNIT
1328           && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1329           && !REG_P (op0)
1330           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1331         {
1332           unit = unit / 2;
1333           continue;
1334         }
1335
1336       /* THISSIZE must not overrun a word boundary.  Otherwise,
1337          store_fixed_bit_field will call us again, and we will mutually
1338          recurse forever.  */
1339       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1340       thissize = MIN (thissize, unit - thispos);
1341
1342       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1343         {
1344           /* Fetch successively less significant portions.  */
1345           if (CONST_INT_P (value))
1346             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1347                              >> (bitsize - bitsdone - thissize))
1348                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1349           /* Likewise, but the source is little-endian.  */
1350           else if (reverse)
1351             part = extract_fixed_bit_field (word_mode, value, thissize,
1352                                             bitsize - bitsdone - thissize,
1353                                             NULL_RTX, 1, false);
1354           else
1355             {
1356               int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1357               /* The args are chosen so that the last part includes the
1358                  lsb.  Give extract_bit_field the value it needs (with
1359                  endianness compensation) to fetch the piece we want.  */
1360               part = extract_fixed_bit_field (word_mode, value, thissize,
1361                                               total_bits - bitsize + bitsdone,
1362                                               NULL_RTX, 1, false);
1363             }
1364         }
1365       else
1366         {
1367           /* Fetch successively more significant portions.  */
1368           if (CONST_INT_P (value))
1369             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1370                              >> bitsdone)
1371                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1372           /* Likewise, but the source is big-endian.  */
1373           else if (reverse)
1374             part = extract_fixed_bit_field (word_mode, value, thissize,
1375                                             total_bits - bitsdone - thissize,
1376                                             NULL_RTX, 1, false);
1377           else
1378             part = extract_fixed_bit_field (word_mode, value, thissize,
1379                                             bitsdone, NULL_RTX, 1, false);
1380         }
1381
1382       /* If OP0 is a register, then handle OFFSET here.
1383
1384          When handling multiword bitfields, extract_bit_field may pass
1385          down a word_mode SUBREG of a larger REG for a bitfield that actually
1386          crosses a word boundary.  Thus, for a SUBREG, we must find
1387          the current word starting from the base register.  */
1388       if (GET_CODE (op0) == SUBREG)
1389         {
1390           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD)
1391                             + (offset * unit / BITS_PER_WORD);
1392           machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1393           if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1394             word = word_offset ? const0_rtx : op0;
1395           else
1396             word = operand_subword_force (SUBREG_REG (op0), word_offset,
1397                                           GET_MODE (SUBREG_REG (op0)));
1398           offset &= BITS_PER_WORD / unit - 1;
1399         }
1400       else if (REG_P (op0))
1401         {
1402           machine_mode op0_mode = GET_MODE (op0);
1403           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1404             word = offset ? const0_rtx : op0;
1405           else
1406             word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1407                                           GET_MODE (op0));
1408           offset &= BITS_PER_WORD / unit - 1;
1409         }
1410       else
1411         word = op0;
1412
1413       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1414          it is just an out-of-bounds access.  Ignore it.  */
1415       if (word != const0_rtx)
1416         store_fixed_bit_field (word, thissize, offset * unit + thispos,
1417                                bitregion_start, bitregion_end, part,
1418                                reverse);
1419       bitsdone += thissize;
1420     }
1421 }
1422 \f
1423 /* A subroutine of extract_bit_field_1 that converts return value X
1424    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1425    to extract_bit_field.  */
1426
1427 static rtx
1428 convert_extracted_bit_field (rtx x, machine_mode mode,
1429                              machine_mode tmode, bool unsignedp)
1430 {
1431   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1432     return x;
1433
1434   /* If the x mode is not a scalar integral, first convert to the
1435      integer mode of that size and then access it as a floating-point
1436      value via a SUBREG.  */
1437   if (!SCALAR_INT_MODE_P (tmode))
1438     {
1439       machine_mode smode;
1440
1441       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1442       x = convert_to_mode (smode, x, unsignedp);
1443       x = force_reg (smode, x);
1444       return gen_lowpart (tmode, x);
1445     }
1446
1447   return convert_to_mode (tmode, x, unsignedp);
1448 }
1449
1450 /* Try to use an ext(z)v pattern to extract a field from OP0.
1451    Return the extracted value on success, otherwise return null.
1452    EXT_MODE is the mode of the extraction and the other arguments
1453    are as for extract_bit_field.  */
1454
1455 static rtx
1456 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1457                               unsigned HOST_WIDE_INT bitsize,
1458                               unsigned HOST_WIDE_INT bitnum,
1459                               int unsignedp, rtx target,
1460                               machine_mode mode, machine_mode tmode)
1461 {
1462   struct expand_operand ops[4];
1463   rtx spec_target = target;
1464   rtx spec_target_subreg = 0;
1465   machine_mode ext_mode = extv->field_mode;
1466   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1467
1468   if (bitsize == 0 || unit < bitsize)
1469     return NULL_RTX;
1470
1471   if (MEM_P (op0))
1472     /* Get a reference to the first byte of the field.  */
1473     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1474                                 &bitnum);
1475   else
1476     {
1477       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1478       if (BYTES_BIG_ENDIAN)
1479         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1480
1481       /* If op0 is a register, we need it in EXT_MODE to make it
1482          acceptable to the format of ext(z)v.  */
1483       if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1484         return NULL_RTX;
1485       if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1486         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1487     }
1488
1489   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1490      "backwards" from the size of the unit we are extracting from.
1491      Otherwise, we count bits from the most significant on a
1492      BYTES/BITS_BIG_ENDIAN machine.  */
1493
1494   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1495     bitnum = unit - bitsize - bitnum;
1496
1497   if (target == 0)
1498     target = spec_target = gen_reg_rtx (tmode);
1499
1500   if (GET_MODE (target) != ext_mode)
1501     {
1502       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1503          between the mode of the extraction (word_mode) and the target
1504          mode.  Instead, create a temporary and use convert_move to set
1505          the target.  */
1506       if (REG_P (target)
1507           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1508         {
1509           target = gen_lowpart (ext_mode, target);
1510           if (GET_MODE_PRECISION (ext_mode)
1511               > GET_MODE_PRECISION (GET_MODE (spec_target)))
1512             spec_target_subreg = target;
1513         }
1514       else
1515         target = gen_reg_rtx (ext_mode);
1516     }
1517
1518   create_output_operand (&ops[0], target, ext_mode);
1519   create_fixed_operand (&ops[1], op0);
1520   create_integer_operand (&ops[2], bitsize);
1521   create_integer_operand (&ops[3], bitnum);
1522   if (maybe_expand_insn (extv->icode, 4, ops))
1523     {
1524       target = ops[0].value;
1525       if (target == spec_target)
1526         return target;
1527       if (target == spec_target_subreg)
1528         return spec_target;
1529       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1530     }
1531   return NULL_RTX;
1532 }
1533
1534 /* A subroutine of extract_bit_field, with the same arguments.
1535    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1536    if we can find no other means of implementing the operation.
1537    if FALLBACK_P is false, return NULL instead.  */
1538
1539 static rtx
1540 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1541                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1542                      machine_mode mode, machine_mode tmode,
1543                      bool reverse, bool fallback_p)
1544 {
1545   rtx op0 = str_rtx;
1546   machine_mode int_mode;
1547   machine_mode mode1;
1548
1549   if (tmode == VOIDmode)
1550     tmode = mode;
1551
1552   while (GET_CODE (op0) == SUBREG)
1553     {
1554       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1555       op0 = SUBREG_REG (op0);
1556     }
1557
1558   /* If we have an out-of-bounds access to a register, just return an
1559      uninitialized register of the required mode.  This can occur if the
1560      source code contains an out-of-bounds access to a small array.  */
1561   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1562     return gen_reg_rtx (tmode);
1563
1564   if (REG_P (op0)
1565       && mode == GET_MODE (op0)
1566       && bitnum == 0
1567       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1568     {
1569       if (reverse)
1570         op0 = flip_storage_order (mode, op0);
1571       /* We're trying to extract a full register from itself.  */
1572       return op0;
1573     }
1574
1575   /* See if we can get a better vector mode before extracting.  */
1576   if (VECTOR_MODE_P (GET_MODE (op0))
1577       && !MEM_P (op0)
1578       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1579     {
1580       machine_mode new_mode;
1581
1582       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1583         new_mode = MIN_MODE_VECTOR_FLOAT;
1584       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1585         new_mode = MIN_MODE_VECTOR_FRACT;
1586       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1587         new_mode = MIN_MODE_VECTOR_UFRACT;
1588       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1589         new_mode = MIN_MODE_VECTOR_ACCUM;
1590       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1591         new_mode = MIN_MODE_VECTOR_UACCUM;
1592       else
1593         new_mode = MIN_MODE_VECTOR_INT;
1594
1595       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1596         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1597             && targetm.vector_mode_supported_p (new_mode))
1598           break;
1599       if (new_mode != VOIDmode)
1600         op0 = gen_lowpart (new_mode, op0);
1601     }
1602
1603   /* Use vec_extract patterns for extracting parts of vectors whenever
1604      available.  */
1605   if (VECTOR_MODE_P (GET_MODE (op0))
1606       && !MEM_P (op0)
1607       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1608       && ((bitnum + bitsize - 1) / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
1609           == bitnum / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
1610     {
1611       struct expand_operand ops[3];
1612       machine_mode outermode = GET_MODE (op0);
1613       machine_mode innermode = GET_MODE_INNER (outermode);
1614       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1615       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1616
1617       create_output_operand (&ops[0], target, innermode);
1618       create_input_operand (&ops[1], op0, outermode);
1619       create_integer_operand (&ops[2], pos);
1620       if (maybe_expand_insn (icode, 3, ops))
1621         {
1622           target = ops[0].value;
1623           if (GET_MODE (target) != mode)
1624             return gen_lowpart (tmode, target);
1625           return target;
1626         }
1627     }
1628
1629   /* Make sure we are playing with integral modes.  Pun with subregs
1630      if we aren't.  */
1631   {
1632     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1633     if (imode != GET_MODE (op0))
1634       {
1635         if (MEM_P (op0))
1636           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1637         else if (imode != BLKmode)
1638           {
1639             op0 = gen_lowpart (imode, op0);
1640
1641             /* If we got a SUBREG, force it into a register since we
1642                aren't going to be able to do another SUBREG on it.  */
1643             if (GET_CODE (op0) == SUBREG)
1644               op0 = force_reg (imode, op0);
1645           }
1646         else if (REG_P (op0))
1647           {
1648             rtx reg, subreg;
1649             imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1650                                             MODE_INT);
1651             reg = gen_reg_rtx (imode);
1652             subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1653             emit_move_insn (subreg, op0);
1654             op0 = reg;
1655             bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1656           }
1657         else
1658           {
1659             HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1660             rtx mem = assign_stack_temp (GET_MODE (op0), size);
1661             emit_move_insn (mem, op0);
1662             op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1663           }
1664       }
1665   }
1666
1667   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1668      If that's wrong, the solution is to test for it and set TARGET to 0
1669      if needed.  */
1670
1671   /* Get the mode of the field to use for atomic access or subreg
1672      conversion.  */
1673   mode1 = mode;
1674   if (SCALAR_INT_MODE_P (tmode))
1675     {
1676       machine_mode try_mode = mode_for_size (bitsize,
1677                                                   GET_MODE_CLASS (tmode), 0);
1678       if (try_mode != BLKmode)
1679         mode1 = try_mode;
1680     }
1681   gcc_assert (mode1 != BLKmode);
1682
1683   /* Extraction of a full MODE1 value can be done with a subreg as long
1684      as the least significant bit of the value is the least significant
1685      bit of either OP0 or a word of OP0.  */
1686   if (!MEM_P (op0)
1687       && !reverse
1688       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1689       && bitsize == GET_MODE_BITSIZE (mode1)
1690       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1691     {
1692       rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1693                                      bitnum / BITS_PER_UNIT);
1694       if (sub)
1695         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1696     }
1697
1698   /* Extraction of a full MODE1 value can be done with a load as long as
1699      the field is on a byte boundary and is sufficiently aligned.  */
1700   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1701     {
1702       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1703       if (reverse)
1704         op0 = flip_storage_order (mode1, op0);
1705       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1706     }
1707
1708   /* Handle fields bigger than a word.  */
1709
1710   if (bitsize > BITS_PER_WORD)
1711     {
1712       /* Here we transfer the words of the field
1713          in the order least significant first.
1714          This is because the most significant word is the one which may
1715          be less than full.  */
1716
1717       const bool backwards = WORDS_BIG_ENDIAN;
1718       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1719       unsigned int i;
1720       rtx_insn *last;
1721
1722       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1723         target = gen_reg_rtx (mode);
1724
1725       /* In case we're about to clobber a base register or something
1726          (see gcc.c-torture/execute/20040625-1.c).   */
1727       if (reg_mentioned_p (target, str_rtx))
1728         target = gen_reg_rtx (mode);
1729
1730       /* Indicate for flow that the entire target reg is being set.  */
1731       emit_clobber (target);
1732
1733       last = get_last_insn ();
1734       for (i = 0; i < nwords; i++)
1735         {
1736           /* If I is 0, use the low-order word in both field and target;
1737              if I is 1, use the next to lowest word; and so on.  */
1738           /* Word number in TARGET to use.  */
1739           unsigned int wordnum
1740             = (backwards
1741                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1742                : i);
1743           /* Offset from start of field in OP0.  */
1744           unsigned int bit_offset = (backwards ^ reverse
1745                                      ? MAX ((int) bitsize - ((int) i + 1)
1746                                             * BITS_PER_WORD,
1747                                             0)
1748                                      : (int) i * BITS_PER_WORD);
1749           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1750           rtx result_part
1751             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1752                                              bitsize - i * BITS_PER_WORD),
1753                                    bitnum + bit_offset, 1, target_part,
1754                                    mode, word_mode, reverse, fallback_p);
1755
1756           gcc_assert (target_part);
1757           if (!result_part)
1758             {
1759               delete_insns_since (last);
1760               return NULL;
1761             }
1762
1763           if (result_part != target_part)
1764             emit_move_insn (target_part, result_part);
1765         }
1766
1767       if (unsignedp)
1768         {
1769           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1770              need to be zero'd out.  */
1771           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1772             {
1773               unsigned int i, total_words;
1774
1775               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1776               for (i = nwords; i < total_words; i++)
1777                 emit_move_insn
1778                   (operand_subword (target,
1779                                     backwards ? total_words - i - 1 : i,
1780                                     1, VOIDmode),
1781                    const0_rtx);
1782             }
1783           return target;
1784         }
1785
1786       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1787       target = expand_shift (LSHIFT_EXPR, mode, target,
1788                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1789       return expand_shift (RSHIFT_EXPR, mode, target,
1790                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1791     }
1792
1793   /* If OP0 is a multi-word register, narrow it to the affected word.
1794      If the region spans two words, defer to extract_split_bit_field.  */
1795   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1796     {
1797       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1798                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1799       bitnum %= BITS_PER_WORD;
1800       if (bitnum + bitsize > BITS_PER_WORD)
1801         {
1802           if (!fallback_p)
1803             return NULL_RTX;
1804           target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1805                                             reverse);
1806           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1807         }
1808     }
1809
1810   /* From here on we know the desired field is smaller than a word.
1811      If OP0 is a register, it too fits within a word.  */
1812   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1813   extraction_insn extv;
1814   if (!MEM_P (op0)
1815       && !reverse
1816       /* ??? We could limit the structure size to the part of OP0 that
1817          contains the field, with appropriate checks for endianness
1818          and TRULY_NOOP_TRUNCATION.  */
1819       && get_best_reg_extraction_insn (&extv, pattern,
1820                                        GET_MODE_BITSIZE (GET_MODE (op0)),
1821                                        tmode))
1822     {
1823       rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1824                                                  unsignedp, target, mode,
1825                                                  tmode);
1826       if (result)
1827         return result;
1828     }
1829
1830   /* If OP0 is a memory, try copying it to a register and seeing if a
1831      cheap register alternative is available.  */
1832   if (MEM_P (op0) & !reverse)
1833     {
1834       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1835                                         tmode))
1836         {
1837           rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1838                                                      bitnum, unsignedp,
1839                                                      target, mode,
1840                                                      tmode);
1841           if (result)
1842             return result;
1843         }
1844
1845       rtx_insn *last = get_last_insn ();
1846
1847       /* Try loading part of OP0 into a register and extracting the
1848          bitfield from that.  */
1849       unsigned HOST_WIDE_INT bitpos;
1850       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1851                                                0, 0, tmode, &bitpos);
1852       if (xop0)
1853         {
1854           xop0 = copy_to_reg (xop0);
1855           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1856                                             unsignedp, target,
1857                                             mode, tmode, reverse, false);
1858           if (result)
1859             return result;
1860           delete_insns_since (last);
1861         }
1862     }
1863
1864   if (!fallback_p)
1865     return NULL;
1866
1867   /* Find a correspondingly-sized integer field, so we can apply
1868      shifts and masks to it.  */
1869   int_mode = int_mode_for_mode (tmode);
1870   if (int_mode == BLKmode)
1871     int_mode = int_mode_for_mode (mode);
1872   /* Should probably push op0 out to memory and then do a load.  */
1873   gcc_assert (int_mode != BLKmode);
1874
1875   target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum, target,
1876                                     unsignedp, reverse);
1877
1878   /* Complex values must be reversed piecewise, so we need to undo the global
1879      reversal, convert to the complex mode and reverse again.  */
1880   if (reverse && COMPLEX_MODE_P (tmode))
1881     {
1882       target = flip_storage_order (int_mode, target);
1883       target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1884       target = flip_storage_order (tmode, target);
1885     }
1886   else
1887     target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1888
1889   return target;
1890 }
1891
1892 /* Generate code to extract a byte-field from STR_RTX
1893    containing BITSIZE bits, starting at BITNUM,
1894    and put it in TARGET if possible (if TARGET is nonzero).
1895    Regardless of TARGET, we return the rtx for where the value is placed.
1896
1897    STR_RTX is the structure containing the byte (a REG or MEM).
1898    UNSIGNEDP is nonzero if this is an unsigned bit field.
1899    MODE is the natural mode of the field value once extracted.
1900    TMODE is the mode the caller would like the value to have;
1901    but the value may be returned with type MODE instead.
1902
1903    If REVERSE is true, the extraction is to be done in reverse order.
1904
1905    If a TARGET is specified and we can store in it at no extra cost,
1906    we do so, and return TARGET.
1907    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1908    if they are equally easy.  */
1909
1910 rtx
1911 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1912                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1913                    machine_mode mode, machine_mode tmode, bool reverse)
1914 {
1915   machine_mode mode1;
1916
1917   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1918   if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1919     mode1 = GET_MODE (str_rtx);
1920   else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1921     mode1 = GET_MODE (target);
1922   else
1923     mode1 = tmode;
1924
1925   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1926     {
1927       /* Extraction of a full MODE1 value can be done with a simple load.
1928          We know here that the field can be accessed with one single
1929          instruction.  For targets that support unaligned memory,
1930          an unaligned access may be necessary.  */
1931       if (bitsize == GET_MODE_BITSIZE (mode1))
1932         {
1933           rtx result = adjust_bitfield_address (str_rtx, mode1,
1934                                                 bitnum / BITS_PER_UNIT);
1935           if (reverse)
1936             result = flip_storage_order (mode1, result);
1937           gcc_assert (bitnum % BITS_PER_UNIT == 0);
1938           return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1939         }
1940
1941       str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1942                                       &bitnum);
1943       gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (mode1));
1944       str_rtx = copy_to_reg (str_rtx);
1945     }
1946
1947   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1948                               target, mode, tmode, reverse, true);
1949 }
1950 \f
1951 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1952    from bit BITNUM of OP0.
1953
1954    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1955    If REVERSE is true, the extraction is to be done in reverse order.
1956
1957    If TARGET is nonzero, attempts to store the value there
1958    and return TARGET, but this is not guaranteed.
1959    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1960
1961 static rtx
1962 extract_fixed_bit_field (machine_mode tmode, rtx op0,
1963                          unsigned HOST_WIDE_INT bitsize,
1964                          unsigned HOST_WIDE_INT bitnum, rtx target,
1965                          int unsignedp, bool reverse)
1966 {
1967   if (MEM_P (op0))
1968     {
1969       machine_mode mode
1970         = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1971                          MEM_VOLATILE_P (op0));
1972
1973       if (mode == VOIDmode)
1974         /* The only way this should occur is if the field spans word
1975            boundaries.  */
1976         return extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1977                                         reverse);
1978
1979       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1980     }
1981
1982   return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1983                                     target, unsignedp, reverse);
1984 }
1985
1986 /* Helper function for extract_fixed_bit_field, extracts
1987    the bit field always using the MODE of OP0.  */
1988
1989 static rtx
1990 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
1991                            unsigned HOST_WIDE_INT bitsize,
1992                            unsigned HOST_WIDE_INT bitnum, rtx target,
1993                            int unsignedp, bool reverse)
1994 {
1995   machine_mode mode = GET_MODE (op0);
1996   gcc_assert (SCALAR_INT_MODE_P (mode));
1997
1998   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1999      for invalid input, such as extract equivalent of f5 from
2000      gcc.dg/pr48335-2.c.  */
2001
2002   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2003     /* BITNUM is the distance between our msb and that of OP0.
2004        Convert it to the distance from the lsb.  */
2005     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2006
2007   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2008      We have reduced the big-endian case to the little-endian case.  */
2009   if (reverse)
2010     op0 = flip_storage_order (mode, op0);
2011
2012   if (unsignedp)
2013     {
2014       if (bitnum)
2015         {
2016           /* If the field does not already start at the lsb,
2017              shift it so it does.  */
2018           /* Maybe propagate the target for the shift.  */
2019           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2020           if (tmode != mode)
2021             subtarget = 0;
2022           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2023         }
2024       /* Convert the value to the desired mode.  */
2025       if (mode != tmode)
2026         op0 = convert_to_mode (tmode, op0, 1);
2027
2028       /* Unless the msb of the field used to be the msb when we shifted,
2029          mask out the upper bits.  */
2030
2031       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2032         return expand_binop (GET_MODE (op0), and_optab, op0,
2033                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
2034                              target, 1, OPTAB_LIB_WIDEN);
2035       return op0;
2036     }
2037
2038   /* To extract a signed bit-field, first shift its msb to the msb of the word,
2039      then arithmetic-shift its lsb to the lsb of the word.  */
2040   op0 = force_reg (mode, op0);
2041
2042   /* Find the narrowest integer mode that contains the field.  */
2043
2044   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
2045        mode = GET_MODE_WIDER_MODE (mode))
2046     if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
2047       {
2048         op0 = convert_to_mode (mode, op0, 0);
2049         break;
2050       }
2051
2052   if (mode != tmode)
2053     target = 0;
2054
2055   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2056     {
2057       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2058       /* Maybe propagate the target for the shift.  */
2059       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2060       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2061     }
2062
2063   return expand_shift (RSHIFT_EXPR, mode, op0,
2064                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2065 }
2066
2067 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2068    VALUE << BITPOS.  */
2069
2070 static rtx
2071 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2072               int bitpos)
2073 {
2074   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2075 }
2076 \f
2077 /* Extract a bit field that is split across two words
2078    and return an RTX for the result.
2079
2080    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2081    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2082    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2083
2084    If REVERSE is true, the extraction is to be done in reverse order.  */
2085
2086 static rtx
2087 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2088                          unsigned HOST_WIDE_INT bitpos, int unsignedp,
2089                          bool reverse)
2090 {
2091   unsigned int unit;
2092   unsigned int bitsdone = 0;
2093   rtx result = NULL_RTX;
2094   int first = 1;
2095
2096   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2097      much at a time.  */
2098   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2099     unit = BITS_PER_WORD;
2100   else
2101     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2102
2103   while (bitsdone < bitsize)
2104     {
2105       unsigned HOST_WIDE_INT thissize;
2106       rtx part, word;
2107       unsigned HOST_WIDE_INT thispos;
2108       unsigned HOST_WIDE_INT offset;
2109
2110       offset = (bitpos + bitsdone) / unit;
2111       thispos = (bitpos + bitsdone) % unit;
2112
2113       /* THISSIZE must not overrun a word boundary.  Otherwise,
2114          extract_fixed_bit_field will call us again, and we will mutually
2115          recurse forever.  */
2116       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2117       thissize = MIN (thissize, unit - thispos);
2118
2119       /* If OP0 is a register, then handle OFFSET here.
2120
2121          When handling multiword bitfields, extract_bit_field may pass
2122          down a word_mode SUBREG of a larger REG for a bitfield that actually
2123          crosses a word boundary.  Thus, for a SUBREG, we must find
2124          the current word starting from the base register.  */
2125       if (GET_CODE (op0) == SUBREG)
2126         {
2127           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
2128           word = operand_subword_force (SUBREG_REG (op0), word_offset,
2129                                         GET_MODE (SUBREG_REG (op0)));
2130           offset = 0;
2131         }
2132       else if (REG_P (op0))
2133         {
2134           word = operand_subword_force (op0, offset, GET_MODE (op0));
2135           offset = 0;
2136         }
2137       else
2138         word = op0;
2139
2140       /* Extract the parts in bit-counting order,
2141          whose meaning is determined by BYTES_PER_UNIT.
2142          OFFSET is in UNITs, and UNIT is in bits.  */
2143       part = extract_fixed_bit_field (word_mode, word, thissize,
2144                                       offset * unit + thispos, 0, 1, reverse);
2145       bitsdone += thissize;
2146
2147       /* Shift this part into place for the result.  */
2148       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2149         {
2150           if (bitsize != bitsdone)
2151             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2152                                  bitsize - bitsdone, 0, 1);
2153         }
2154       else
2155         {
2156           if (bitsdone != thissize)
2157             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2158                                  bitsdone - thissize, 0, 1);
2159         }
2160
2161       if (first)
2162         result = part;
2163       else
2164         /* Combine the parts with bitwise or.  This works
2165            because we extracted each part as an unsigned bit field.  */
2166         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2167                                OPTAB_LIB_WIDEN);
2168
2169       first = 0;
2170     }
2171
2172   /* Unsigned bit field: we are done.  */
2173   if (unsignedp)
2174     return result;
2175   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2176   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2177                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2178   return expand_shift (RSHIFT_EXPR, word_mode, result,
2179                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2180 }
2181 \f
2182 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2183    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2184    MODE, fill the upper bits with zeros.  Fail if the layout of either
2185    mode is unknown (as for CC modes) or if the extraction would involve
2186    unprofitable mode punning.  Return the value on success, otherwise
2187    return null.
2188
2189    This is different from gen_lowpart* in these respects:
2190
2191      - the returned value must always be considered an rvalue
2192
2193      - when MODE is wider than SRC_MODE, the extraction involves
2194        a zero extension
2195
2196      - when MODE is smaller than SRC_MODE, the extraction involves
2197        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2198
2199    In other words, this routine performs a computation, whereas the
2200    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2201    operations.  */
2202
2203 rtx
2204 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2205 {
2206   machine_mode int_mode, src_int_mode;
2207
2208   if (mode == src_mode)
2209     return src;
2210
2211   if (CONSTANT_P (src))
2212     {
2213       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2214          fails, it will happily create (subreg (symbol_ref)) or similar
2215          invalid SUBREGs.  */
2216       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2217       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2218       if (ret)
2219         return ret;
2220
2221       if (GET_MODE (src) == VOIDmode
2222           || !validate_subreg (mode, src_mode, src, byte))
2223         return NULL_RTX;
2224
2225       src = force_reg (GET_MODE (src), src);
2226       return gen_rtx_SUBREG (mode, src, byte);
2227     }
2228
2229   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2230     return NULL_RTX;
2231
2232   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2233       && MODES_TIEABLE_P (mode, src_mode))
2234     {
2235       rtx x = gen_lowpart_common (mode, src);
2236       if (x)
2237         return x;
2238     }
2239
2240   src_int_mode = int_mode_for_mode (src_mode);
2241   int_mode = int_mode_for_mode (mode);
2242   if (src_int_mode == BLKmode || int_mode == BLKmode)
2243     return NULL_RTX;
2244
2245   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2246     return NULL_RTX;
2247   if (!MODES_TIEABLE_P (int_mode, mode))
2248     return NULL_RTX;
2249
2250   src = gen_lowpart (src_int_mode, src);
2251   src = convert_modes (int_mode, src_int_mode, src, true);
2252   src = gen_lowpart (mode, src);
2253   return src;
2254 }
2255 \f
2256 /* Add INC into TARGET.  */
2257
2258 void
2259 expand_inc (rtx target, rtx inc)
2260 {
2261   rtx value = expand_binop (GET_MODE (target), add_optab,
2262                             target, inc,
2263                             target, 0, OPTAB_LIB_WIDEN);
2264   if (value != target)
2265     emit_move_insn (target, value);
2266 }
2267
2268 /* Subtract DEC from TARGET.  */
2269
2270 void
2271 expand_dec (rtx target, rtx dec)
2272 {
2273   rtx value = expand_binop (GET_MODE (target), sub_optab,
2274                             target, dec,
2275                             target, 0, OPTAB_LIB_WIDEN);
2276   if (value != target)
2277     emit_move_insn (target, value);
2278 }
2279 \f
2280 /* Output a shift instruction for expression code CODE,
2281    with SHIFTED being the rtx for the value to shift,
2282    and AMOUNT the rtx for the amount to shift by.
2283    Store the result in the rtx TARGET, if that is convenient.
2284    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2285    Return the rtx for where the value is.  */
2286
2287 static rtx
2288 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2289                 rtx amount, rtx target, int unsignedp)
2290 {
2291   rtx op1, temp = 0;
2292   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2293   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2294   optab lshift_optab = ashl_optab;
2295   optab rshift_arith_optab = ashr_optab;
2296   optab rshift_uns_optab = lshr_optab;
2297   optab lrotate_optab = rotl_optab;
2298   optab rrotate_optab = rotr_optab;
2299   machine_mode op1_mode;
2300   machine_mode scalar_mode = mode;
2301   int attempt;
2302   bool speed = optimize_insn_for_speed_p ();
2303
2304   if (VECTOR_MODE_P (mode))
2305     scalar_mode = GET_MODE_INNER (mode);
2306   op1 = amount;
2307   op1_mode = GET_MODE (op1);
2308
2309   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2310      shift amount is a vector, use the vector/vector shift patterns.  */
2311   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2312     {
2313       lshift_optab = vashl_optab;
2314       rshift_arith_optab = vashr_optab;
2315       rshift_uns_optab = vlshr_optab;
2316       lrotate_optab = vrotl_optab;
2317       rrotate_optab = vrotr_optab;
2318     }
2319
2320   /* Previously detected shift-counts computed by NEGATE_EXPR
2321      and shifted in the other direction; but that does not work
2322      on all machines.  */
2323
2324   if (SHIFT_COUNT_TRUNCATED)
2325     {
2326       if (CONST_INT_P (op1)
2327           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2328               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2329         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2330                        % GET_MODE_BITSIZE (scalar_mode));
2331       else if (GET_CODE (op1) == SUBREG
2332                && subreg_lowpart_p (op1)
2333                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2334                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2335         op1 = SUBREG_REG (op1);
2336     }
2337
2338   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2339      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2340      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2341      amount instead.  */
2342   if (rotate
2343       && CONST_INT_P (op1)
2344       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2345                    GET_MODE_BITSIZE (scalar_mode) - 1))
2346     {
2347       op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2348       left = !left;
2349       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2350     }
2351
2352   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2353      Note that this is not the case for bigger values.  For instance a rotation
2354      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2355      0x04030201 (bswapsi).  */
2356   if (rotate
2357       && CONST_INT_P (op1)
2358       && INTVAL (op1) == BITS_PER_UNIT
2359       && GET_MODE_SIZE (scalar_mode) == 2
2360       && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2361     return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2362                                   unsignedp);
2363
2364   if (op1 == const0_rtx)
2365     return shifted;
2366
2367   /* Check whether its cheaper to implement a left shift by a constant
2368      bit count by a sequence of additions.  */
2369   if (code == LSHIFT_EXPR
2370       && CONST_INT_P (op1)
2371       && INTVAL (op1) > 0
2372       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2373       && INTVAL (op1) < MAX_BITS_PER_WORD
2374       && (shift_cost (speed, mode, INTVAL (op1))
2375           > INTVAL (op1) * add_cost (speed, mode))
2376       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2377     {
2378       int i;
2379       for (i = 0; i < INTVAL (op1); i++)
2380         {
2381           temp = force_reg (mode, shifted);
2382           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2383                                   unsignedp, OPTAB_LIB_WIDEN);
2384         }
2385       return shifted;
2386     }
2387
2388   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2389     {
2390       enum optab_methods methods;
2391
2392       if (attempt == 0)
2393         methods = OPTAB_DIRECT;
2394       else if (attempt == 1)
2395         methods = OPTAB_WIDEN;
2396       else
2397         methods = OPTAB_LIB_WIDEN;
2398
2399       if (rotate)
2400         {
2401           /* Widening does not work for rotation.  */
2402           if (methods == OPTAB_WIDEN)
2403             continue;
2404           else if (methods == OPTAB_LIB_WIDEN)
2405             {
2406               /* If we have been unable to open-code this by a rotation,
2407                  do it as the IOR of two shifts.  I.e., to rotate A
2408                  by N bits, compute
2409                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2410                  where C is the bitsize of A.
2411
2412                  It is theoretically possible that the target machine might
2413                  not be able to perform either shift and hence we would
2414                  be making two libcalls rather than just the one for the
2415                  shift (similarly if IOR could not be done).  We will allow
2416                  this extremely unlikely lossage to avoid complicating the
2417                  code below.  */
2418
2419               rtx subtarget = target == shifted ? 0 : target;
2420               rtx new_amount, other_amount;
2421               rtx temp1;
2422
2423               new_amount = op1;
2424               if (op1 == const0_rtx)
2425                 return shifted;
2426               else if (CONST_INT_P (op1))
2427                 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2428                                         - INTVAL (op1));
2429               else
2430                 {
2431                   other_amount
2432                     = simplify_gen_unary (NEG, GET_MODE (op1),
2433                                           op1, GET_MODE (op1));
2434                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2435                   other_amount
2436                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2437                                            gen_int_mode (mask, GET_MODE (op1)));
2438                 }
2439
2440               shifted = force_reg (mode, shifted);
2441
2442               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2443                                      mode, shifted, new_amount, 0, 1);
2444               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2445                                       mode, shifted, other_amount,
2446                                       subtarget, 1);
2447               return expand_binop (mode, ior_optab, temp, temp1, target,
2448                                    unsignedp, methods);
2449             }
2450
2451           temp = expand_binop (mode,
2452                                left ? lrotate_optab : rrotate_optab,
2453                                shifted, op1, target, unsignedp, methods);
2454         }
2455       else if (unsignedp)
2456         temp = expand_binop (mode,
2457                              left ? lshift_optab : rshift_uns_optab,
2458                              shifted, op1, target, unsignedp, methods);
2459
2460       /* Do arithmetic shifts.
2461          Also, if we are going to widen the operand, we can just as well
2462          use an arithmetic right-shift instead of a logical one.  */
2463       if (temp == 0 && ! rotate
2464           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2465         {
2466           enum optab_methods methods1 = methods;
2467
2468           /* If trying to widen a log shift to an arithmetic shift,
2469              don't accept an arithmetic shift of the same size.  */
2470           if (unsignedp)
2471             methods1 = OPTAB_MUST_WIDEN;
2472
2473           /* Arithmetic shift */
2474
2475           temp = expand_binop (mode,
2476                                left ? lshift_optab : rshift_arith_optab,
2477                                shifted, op1, target, unsignedp, methods1);
2478         }
2479
2480       /* We used to try extzv here for logical right shifts, but that was
2481          only useful for one machine, the VAX, and caused poor code
2482          generation there for lshrdi3, so the code was deleted and a
2483          define_expand for lshrsi3 was added to vax.md.  */
2484     }
2485
2486   gcc_assert (temp);
2487   return temp;
2488 }
2489
2490 /* Output a shift instruction for expression code CODE,
2491    with SHIFTED being the rtx for the value to shift,
2492    and AMOUNT the amount to shift by.
2493    Store the result in the rtx TARGET, if that is convenient.
2494    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2495    Return the rtx for where the value is.  */
2496
2497 rtx
2498 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2499               int amount, rtx target, int unsignedp)
2500 {
2501   return expand_shift_1 (code, mode,
2502                          shifted, GEN_INT (amount), target, unsignedp);
2503 }
2504
2505 /* Output a shift instruction for expression code CODE,
2506    with SHIFTED being the rtx for the value to shift,
2507    and AMOUNT the tree for the amount to shift by.
2508    Store the result in the rtx TARGET, if that is convenient.
2509    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2510    Return the rtx for where the value is.  */
2511
2512 rtx
2513 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2514                        tree amount, rtx target, int unsignedp)
2515 {
2516   return expand_shift_1 (code, mode,
2517                          shifted, expand_normal (amount), target, unsignedp);
2518 }
2519
2520 \f
2521 /* Indicates the type of fixup needed after a constant multiplication.
2522    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2523    the result should be negated, and ADD_VARIANT means that the
2524    multiplicand should be added to the result.  */
2525 enum mult_variant {basic_variant, negate_variant, add_variant};
2526
2527 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2528                         const struct mult_cost *, machine_mode mode);
2529 static bool choose_mult_variant (machine_mode, HOST_WIDE_INT,
2530                                  struct algorithm *, enum mult_variant *, int);
2531 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2532                               const struct algorithm *, enum mult_variant);
2533 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2534 static rtx extract_high_half (machine_mode, rtx);
2535 static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2536 static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
2537                                        int, int);
2538 /* Compute and return the best algorithm for multiplying by T.
2539    The algorithm must cost less than cost_limit
2540    If retval.cost >= COST_LIMIT, no algorithm was found and all
2541    other field of the returned struct are undefined.
2542    MODE is the machine mode of the multiplication.  */
2543
2544 static void
2545 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2546             const struct mult_cost *cost_limit, machine_mode mode)
2547 {
2548   int m;
2549   struct algorithm *alg_in, *best_alg;
2550   struct mult_cost best_cost;
2551   struct mult_cost new_limit;
2552   int op_cost, op_latency;
2553   unsigned HOST_WIDE_INT orig_t = t;
2554   unsigned HOST_WIDE_INT q;
2555   int maxm, hash_index;
2556   bool cache_hit = false;
2557   enum alg_code cache_alg = alg_zero;
2558   bool speed = optimize_insn_for_speed_p ();
2559   machine_mode imode;
2560   struct alg_hash_entry *entry_ptr;
2561
2562   /* Indicate that no algorithm is yet found.  If no algorithm
2563      is found, this value will be returned and indicate failure.  */
2564   alg_out->cost.cost = cost_limit->cost + 1;
2565   alg_out->cost.latency = cost_limit->latency + 1;
2566
2567   if (cost_limit->cost < 0
2568       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2569     return;
2570
2571   /* Be prepared for vector modes.  */
2572   imode = GET_MODE_INNER (mode);
2573
2574   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2575
2576   /* Restrict the bits of "t" to the multiplication's mode.  */
2577   t &= GET_MODE_MASK (imode);
2578
2579   /* t == 1 can be done in zero cost.  */
2580   if (t == 1)
2581     {
2582       alg_out->ops = 1;
2583       alg_out->cost.cost = 0;
2584       alg_out->cost.latency = 0;
2585       alg_out->op[0] = alg_m;
2586       return;
2587     }
2588
2589   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2590      fail now.  */
2591   if (t == 0)
2592     {
2593       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2594         return;
2595       else
2596         {
2597           alg_out->ops = 1;
2598           alg_out->cost.cost = zero_cost (speed);
2599           alg_out->cost.latency = zero_cost (speed);
2600           alg_out->op[0] = alg_zero;
2601           return;
2602         }
2603     }
2604
2605   /* We'll be needing a couple extra algorithm structures now.  */
2606
2607   alg_in = XALLOCA (struct algorithm);
2608   best_alg = XALLOCA (struct algorithm);
2609   best_cost = *cost_limit;
2610
2611   /* Compute the hash index.  */
2612   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2613
2614   /* See if we already know what to do for T.  */
2615   entry_ptr = alg_hash_entry_ptr (hash_index);
2616   if (entry_ptr->t == t
2617       && entry_ptr->mode == mode
2618       && entry_ptr->mode == mode
2619       && entry_ptr->speed == speed
2620       && entry_ptr->alg != alg_unknown)
2621     {
2622       cache_alg = entry_ptr->alg;
2623
2624       if (cache_alg == alg_impossible)
2625         {
2626           /* The cache tells us that it's impossible to synthesize
2627              multiplication by T within entry_ptr->cost.  */
2628           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2629             /* COST_LIMIT is at least as restrictive as the one
2630                recorded in the hash table, in which case we have no
2631                hope of synthesizing a multiplication.  Just
2632                return.  */
2633             return;
2634
2635           /* If we get here, COST_LIMIT is less restrictive than the
2636              one recorded in the hash table, so we may be able to
2637              synthesize a multiplication.  Proceed as if we didn't
2638              have the cache entry.  */
2639         }
2640       else
2641         {
2642           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2643             /* The cached algorithm shows that this multiplication
2644                requires more cost than COST_LIMIT.  Just return.  This
2645                way, we don't clobber this cache entry with
2646                alg_impossible but retain useful information.  */
2647             return;
2648
2649           cache_hit = true;
2650
2651           switch (cache_alg)
2652             {
2653             case alg_shift:
2654               goto do_alg_shift;
2655
2656             case alg_add_t_m2:
2657             case alg_sub_t_m2:
2658               goto do_alg_addsub_t_m2;
2659
2660             case alg_add_factor:
2661             case alg_sub_factor:
2662               goto do_alg_addsub_factor;
2663
2664             case alg_add_t2_m:
2665               goto do_alg_add_t2_m;
2666
2667             case alg_sub_t2_m:
2668               goto do_alg_sub_t2_m;
2669
2670             default:
2671               gcc_unreachable ();
2672             }
2673         }
2674     }
2675
2676   /* If we have a group of zero bits at the low-order part of T, try
2677      multiplying by the remaining bits and then doing a shift.  */
2678
2679   if ((t & 1) == 0)
2680     {
2681     do_alg_shift:
2682       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2683       if (m < maxm)
2684         {
2685           q = t >> m;
2686           /* The function expand_shift will choose between a shift and
2687              a sequence of additions, so the observed cost is given as
2688              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2689           op_cost = m * add_cost (speed, mode);
2690           if (shift_cost (speed, mode, m) < op_cost)
2691             op_cost = shift_cost (speed, mode, m);
2692           new_limit.cost = best_cost.cost - op_cost;
2693           new_limit.latency = best_cost.latency - op_cost;
2694           synth_mult (alg_in, q, &new_limit, mode);
2695
2696           alg_in->cost.cost += op_cost;
2697           alg_in->cost.latency += op_cost;
2698           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2699             {
2700               best_cost = alg_in->cost;
2701               std::swap (alg_in, best_alg);
2702               best_alg->log[best_alg->ops] = m;
2703               best_alg->op[best_alg->ops] = alg_shift;
2704             }
2705
2706           /* See if treating ORIG_T as a signed number yields a better
2707              sequence.  Try this sequence only for a negative ORIG_T
2708              as it would be useless for a non-negative ORIG_T.  */
2709           if ((HOST_WIDE_INT) orig_t < 0)
2710             {
2711               /* Shift ORIG_T as follows because a right shift of a
2712                  negative-valued signed type is implementation
2713                  defined.  */
2714               q = ~(~orig_t >> m);
2715               /* The function expand_shift will choose between a shift
2716                  and a sequence of additions, so the observed cost is
2717                  given as MIN (m * add_cost(speed, mode),
2718                  shift_cost(speed, mode, m)).  */
2719               op_cost = m * add_cost (speed, mode);
2720               if (shift_cost (speed, mode, m) < op_cost)
2721                 op_cost = shift_cost (speed, mode, m);
2722               new_limit.cost = best_cost.cost - op_cost;
2723               new_limit.latency = best_cost.latency - op_cost;
2724               synth_mult (alg_in, q, &new_limit, mode);
2725
2726               alg_in->cost.cost += op_cost;
2727               alg_in->cost.latency += op_cost;
2728               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2729                 {
2730                   best_cost = alg_in->cost;
2731                   std::swap (alg_in, best_alg);
2732                   best_alg->log[best_alg->ops] = m;
2733                   best_alg->op[best_alg->ops] = alg_shift;
2734                 }
2735             }
2736         }
2737       if (cache_hit)
2738         goto done;
2739     }
2740
2741   /* If we have an odd number, add or subtract one.  */
2742   if ((t & 1) != 0)
2743     {
2744       unsigned HOST_WIDE_INT w;
2745
2746     do_alg_addsub_t_m2:
2747       for (w = 1; (w & t) != 0; w <<= 1)
2748         ;
2749       /* If T was -1, then W will be zero after the loop.  This is another
2750          case where T ends with ...111.  Handling this with (T + 1) and
2751          subtract 1 produces slightly better code and results in algorithm
2752          selection much faster than treating it like the ...0111 case
2753          below.  */
2754       if (w == 0
2755           || (w > 2
2756               /* Reject the case where t is 3.
2757                  Thus we prefer addition in that case.  */
2758               && t != 3))
2759         {
2760           /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2761
2762           op_cost = add_cost (speed, mode);
2763           new_limit.cost = best_cost.cost - op_cost;
2764           new_limit.latency = best_cost.latency - op_cost;
2765           synth_mult (alg_in, t + 1, &new_limit, mode);
2766
2767           alg_in->cost.cost += op_cost;
2768           alg_in->cost.latency += op_cost;
2769           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2770             {
2771               best_cost = alg_in->cost;
2772               std::swap (alg_in, best_alg);
2773               best_alg->log[best_alg->ops] = 0;
2774               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2775             }
2776         }
2777       else
2778         {
2779           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2780
2781           op_cost = add_cost (speed, mode);
2782           new_limit.cost = best_cost.cost - op_cost;
2783           new_limit.latency = best_cost.latency - op_cost;
2784           synth_mult (alg_in, t - 1, &new_limit, mode);
2785
2786           alg_in->cost.cost += op_cost;
2787           alg_in->cost.latency += op_cost;
2788           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2789             {
2790               best_cost = alg_in->cost;
2791               std::swap (alg_in, best_alg);
2792               best_alg->log[best_alg->ops] = 0;
2793               best_alg->op[best_alg->ops] = alg_add_t_m2;
2794             }
2795         }
2796
2797       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2798          quickly with a - a * n for some appropriate constant n.  */
2799       m = exact_log2 (-orig_t + 1);
2800       if (m >= 0 && m < maxm)
2801         {
2802           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2803           /* If the target has a cheap shift-and-subtract insn use
2804              that in preference to a shift insn followed by a sub insn.
2805              Assume that the shift-and-sub is "atomic" with a latency
2806              equal to it's cost, otherwise assume that on superscalar
2807              hardware the shift may be executed concurrently with the
2808              earlier steps in the algorithm.  */
2809           if (shiftsub1_cost (speed, mode, m) <= op_cost)
2810             {
2811               op_cost = shiftsub1_cost (speed, mode, m);
2812               op_latency = op_cost;
2813             }
2814           else
2815             op_latency = add_cost (speed, mode);
2816
2817           new_limit.cost = best_cost.cost - op_cost;
2818           new_limit.latency = best_cost.latency - op_latency;
2819           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2820                       &new_limit, mode);
2821
2822           alg_in->cost.cost += op_cost;
2823           alg_in->cost.latency += op_latency;
2824           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2825             {
2826               best_cost = alg_in->cost;
2827               std::swap (alg_in, best_alg);
2828               best_alg->log[best_alg->ops] = m;
2829               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2830             }
2831         }
2832
2833       if (cache_hit)
2834         goto done;
2835     }
2836
2837   /* Look for factors of t of the form
2838      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2839      If we find such a factor, we can multiply by t using an algorithm that
2840      multiplies by q, shift the result by m and add/subtract it to itself.
2841
2842      We search for large factors first and loop down, even if large factors
2843      are less probable than small; if we find a large factor we will find a
2844      good sequence quickly, and therefore be able to prune (by decreasing
2845      COST_LIMIT) the search.  */
2846
2847  do_alg_addsub_factor:
2848   for (m = floor_log2 (t - 1); m >= 2; m--)
2849     {
2850       unsigned HOST_WIDE_INT d;
2851
2852       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2853       if (t % d == 0 && t > d && m < maxm
2854           && (!cache_hit || cache_alg == alg_add_factor))
2855         {
2856           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2857           if (shiftadd_cost (speed, mode, m) <= op_cost)
2858             op_cost = shiftadd_cost (speed, mode, m);
2859
2860           op_latency = op_cost;
2861
2862
2863           new_limit.cost = best_cost.cost - op_cost;
2864           new_limit.latency = best_cost.latency - op_latency;
2865           synth_mult (alg_in, t / d, &new_limit, mode);
2866
2867           alg_in->cost.cost += op_cost;
2868           alg_in->cost.latency += op_latency;
2869           if (alg_in->cost.latency < op_cost)
2870             alg_in->cost.latency = op_cost;
2871           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2872             {
2873               best_cost = alg_in->cost;
2874               std::swap (alg_in, best_alg);
2875               best_alg->log[best_alg->ops] = m;
2876               best_alg->op[best_alg->ops] = alg_add_factor;
2877             }
2878           /* Other factors will have been taken care of in the recursion.  */
2879           break;
2880         }
2881
2882       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2883       if (t % d == 0 && t > d && m < maxm
2884           && (!cache_hit || cache_alg == alg_sub_factor))
2885         {
2886           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2887           if (shiftsub0_cost (speed, mode, m) <= op_cost)
2888             op_cost = shiftsub0_cost (speed, mode, m);
2889
2890           op_latency = op_cost;
2891
2892           new_limit.cost = best_cost.cost - op_cost;
2893           new_limit.latency = best_cost.latency - op_latency;
2894           synth_mult (alg_in, t / d, &new_limit, mode);
2895
2896           alg_in->cost.cost += op_cost;
2897           alg_in->cost.latency += op_latency;
2898           if (alg_in->cost.latency < op_cost)
2899             alg_in->cost.latency = op_cost;
2900           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2901             {
2902               best_cost = alg_in->cost;
2903               std::swap (alg_in, best_alg);
2904               best_alg->log[best_alg->ops] = m;
2905               best_alg->op[best_alg->ops] = alg_sub_factor;
2906             }
2907           break;
2908         }
2909     }
2910   if (cache_hit)
2911     goto done;
2912
2913   /* Try shift-and-add (load effective address) instructions,
2914      i.e. do a*3, a*5, a*9.  */
2915   if ((t & 1) != 0)
2916     {
2917     do_alg_add_t2_m:
2918       q = t - 1;
2919       q = q & -q;
2920       m = exact_log2 (q);
2921       if (m >= 0 && m < maxm)
2922         {
2923           op_cost = shiftadd_cost (speed, mode, m);
2924           new_limit.cost = best_cost.cost - op_cost;
2925           new_limit.latency = best_cost.latency - op_cost;
2926           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2927
2928           alg_in->cost.cost += op_cost;
2929           alg_in->cost.latency += op_cost;
2930           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2931             {
2932               best_cost = alg_in->cost;
2933               std::swap (alg_in, best_alg);
2934               best_alg->log[best_alg->ops] = m;
2935               best_alg->op[best_alg->ops] = alg_add_t2_m;
2936             }
2937         }
2938       if (cache_hit)
2939         goto done;
2940
2941     do_alg_sub_t2_m:
2942       q = t + 1;
2943       q = q & -q;
2944       m = exact_log2 (q);
2945       if (m >= 0 && m < maxm)
2946         {
2947           op_cost = shiftsub0_cost (speed, mode, m);
2948           new_limit.cost = best_cost.cost - op_cost;
2949           new_limit.latency = best_cost.latency - op_cost;
2950           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2951
2952           alg_in->cost.cost += op_cost;
2953           alg_in->cost.latency += op_cost;
2954           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2955             {
2956               best_cost = alg_in->cost;
2957               std::swap (alg_in, best_alg);
2958               best_alg->log[best_alg->ops] = m;
2959               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2960             }
2961         }
2962       if (cache_hit)
2963         goto done;
2964     }
2965
2966  done:
2967   /* If best_cost has not decreased, we have not found any algorithm.  */
2968   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2969     {
2970       /* We failed to find an algorithm.  Record alg_impossible for
2971          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2972          we are asked to find an algorithm for T within the same or
2973          lower COST_LIMIT, we can immediately return to the
2974          caller.  */
2975       entry_ptr->t = t;
2976       entry_ptr->mode = mode;
2977       entry_ptr->speed = speed;
2978       entry_ptr->alg = alg_impossible;
2979       entry_ptr->cost = *cost_limit;
2980       return;
2981     }
2982
2983   /* Cache the result.  */
2984   if (!cache_hit)
2985     {
2986       entry_ptr->t = t;
2987       entry_ptr->mode = mode;
2988       entry_ptr->speed = speed;
2989       entry_ptr->alg = best_alg->op[best_alg->ops];
2990       entry_ptr->cost.cost = best_cost.cost;
2991       entry_ptr->cost.latency = best_cost.latency;
2992     }
2993
2994   /* If we are getting a too long sequence for `struct algorithm'
2995      to record, make this search fail.  */
2996   if (best_alg->ops == MAX_BITS_PER_WORD)
2997     return;
2998
2999   /* Copy the algorithm from temporary space to the space at alg_out.
3000      We avoid using structure assignment because the majority of
3001      best_alg is normally undefined, and this is a critical function.  */
3002   alg_out->ops = best_alg->ops + 1;
3003   alg_out->cost = best_cost;
3004   memcpy (alg_out->op, best_alg->op,
3005           alg_out->ops * sizeof *alg_out->op);
3006   memcpy (alg_out->log, best_alg->log,
3007           alg_out->ops * sizeof *alg_out->log);
3008 }
3009 \f
3010 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3011    Try three variations:
3012
3013        - a shift/add sequence based on VAL itself
3014        - a shift/add sequence based on -VAL, followed by a negation
3015        - a shift/add sequence based on VAL - 1, followed by an addition.
3016
3017    Return true if the cheapest of these cost less than MULT_COST,
3018    describing the algorithm in *ALG and final fixup in *VARIANT.  */
3019
3020 static bool
3021 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3022                      struct algorithm *alg, enum mult_variant *variant,
3023                      int mult_cost)
3024 {
3025   struct algorithm alg2;
3026   struct mult_cost limit;
3027   int op_cost;
3028   bool speed = optimize_insn_for_speed_p ();
3029
3030   /* Fail quickly for impossible bounds.  */
3031   if (mult_cost < 0)
3032     return false;
3033
3034   /* Ensure that mult_cost provides a reasonable upper bound.
3035      Any constant multiplication can be performed with less
3036      than 2 * bits additions.  */
3037   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3038   if (mult_cost > op_cost)
3039     mult_cost = op_cost;
3040
3041   *variant = basic_variant;
3042   limit.cost = mult_cost;
3043   limit.latency = mult_cost;
3044   synth_mult (alg, val, &limit, mode);
3045
3046   /* This works only if the inverted value actually fits in an
3047      `unsigned int' */
3048   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3049     {
3050       op_cost = neg_cost (speed, mode);
3051       if (MULT_COST_LESS (&alg->cost, mult_cost))
3052         {
3053           limit.cost = alg->cost.cost - op_cost;
3054           limit.latency = alg->cost.latency - op_cost;
3055         }
3056       else
3057         {
3058           limit.cost = mult_cost - op_cost;
3059           limit.latency = mult_cost - op_cost;
3060         }
3061
3062       synth_mult (&alg2, -val, &limit, mode);
3063       alg2.cost.cost += op_cost;
3064       alg2.cost.latency += op_cost;
3065       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3066         *alg = alg2, *variant = negate_variant;
3067     }
3068
3069   /* This proves very useful for division-by-constant.  */
3070   op_cost = add_cost (speed, mode);
3071   if (MULT_COST_LESS (&alg->cost, mult_cost))
3072     {
3073       limit.cost = alg->cost.cost - op_cost;
3074       limit.latency = alg->cost.latency - op_cost;
3075     }
3076   else
3077     {
3078       limit.cost = mult_cost - op_cost;
3079       limit.latency = mult_cost - op_cost;
3080     }
3081
3082   synth_mult (&alg2, val - 1, &limit, mode);
3083   alg2.cost.cost += op_cost;
3084   alg2.cost.latency += op_cost;
3085   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3086     *alg = alg2, *variant = add_variant;
3087
3088   return MULT_COST_LESS (&alg->cost, mult_cost);
3089 }
3090
3091 /* A subroutine of expand_mult, used for constant multiplications.
3092    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3093    convenient.  Use the shift/add sequence described by ALG and apply
3094    the final fixup specified by VARIANT.  */
3095
3096 static rtx
3097 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3098                    rtx target, const struct algorithm *alg,
3099                    enum mult_variant variant)
3100 {
3101   HOST_WIDE_INT val_so_far;
3102   rtx_insn *insn;
3103   rtx accum, tem;
3104   int opno;
3105   machine_mode nmode;
3106
3107   /* Avoid referencing memory over and over and invalid sharing
3108      on SUBREGs.  */
3109   op0 = force_reg (mode, op0);
3110
3111   /* ACCUM starts out either as OP0 or as a zero, depending on
3112      the first operation.  */
3113
3114   if (alg->op[0] == alg_zero)
3115     {
3116       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3117       val_so_far = 0;
3118     }
3119   else if (alg->op[0] == alg_m)
3120     {
3121       accum = copy_to_mode_reg (mode, op0);
3122       val_so_far = 1;
3123     }
3124   else
3125     gcc_unreachable ();
3126
3127   for (opno = 1; opno < alg->ops; opno++)
3128     {
3129       int log = alg->log[opno];
3130       rtx shift_subtarget = optimize ? 0 : accum;
3131       rtx add_target
3132         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3133            && !optimize)
3134           ? target : 0;
3135       rtx accum_target = optimize ? 0 : accum;
3136       rtx accum_inner;
3137
3138       switch (alg->op[opno])
3139         {
3140         case alg_shift:
3141           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3142           /* REG_EQUAL note will be attached to the following insn.  */
3143           emit_move_insn (accum, tem);
3144           val_so_far <<= log;
3145           break;
3146
3147         case alg_add_t_m2:
3148           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3149           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3150                                  add_target ? add_target : accum_target);
3151           val_so_far += (HOST_WIDE_INT) 1 << log;
3152           break;
3153
3154         case alg_sub_t_m2:
3155           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3156           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3157                                  add_target ? add_target : accum_target);
3158           val_so_far -= (HOST_WIDE_INT) 1 << log;
3159           break;
3160
3161         case alg_add_t2_m:
3162           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3163                                 log, shift_subtarget, 0);
3164           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3165                                  add_target ? add_target : accum_target);
3166           val_so_far = (val_so_far << log) + 1;
3167           break;
3168
3169         case alg_sub_t2_m:
3170           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3171                                 log, shift_subtarget, 0);
3172           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3173                                  add_target ? add_target : accum_target);
3174           val_so_far = (val_so_far << log) - 1;
3175           break;
3176
3177         case alg_add_factor:
3178           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3179           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3180                                  add_target ? add_target : accum_target);
3181           val_so_far += val_so_far << log;
3182           break;
3183
3184         case alg_sub_factor:
3185           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3186           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3187                                  (add_target
3188                                   ? add_target : (optimize ? 0 : tem)));
3189           val_so_far = (val_so_far << log) - val_so_far;
3190           break;
3191
3192         default:
3193           gcc_unreachable ();
3194         }
3195
3196       if (SCALAR_INT_MODE_P (mode))
3197         {
3198           /* Write a REG_EQUAL note on the last insn so that we can cse
3199              multiplication sequences.  Note that if ACCUM is a SUBREG,
3200              we've set the inner register and must properly indicate that.  */
3201           tem = op0, nmode = mode;
3202           accum_inner = accum;
3203           if (GET_CODE (accum) == SUBREG)
3204             {
3205               accum_inner = SUBREG_REG (accum);
3206               nmode = GET_MODE (accum_inner);
3207               tem = gen_lowpart (nmode, op0);
3208             }
3209
3210           insn = get_last_insn ();
3211           set_dst_reg_note (insn, REG_EQUAL,
3212                             gen_rtx_MULT (nmode, tem,
3213                                           gen_int_mode (val_so_far, nmode)),
3214                             accum_inner);
3215         }
3216     }
3217
3218   if (variant == negate_variant)
3219     {
3220       val_so_far = -val_so_far;
3221       accum = expand_unop (mode, neg_optab, accum, target, 0);
3222     }
3223   else if (variant == add_variant)
3224     {
3225       val_so_far = val_so_far + 1;
3226       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3227     }
3228
3229   /* Compare only the bits of val and val_so_far that are significant
3230      in the result mode, to avoid sign-/zero-extension confusion.  */
3231   nmode = GET_MODE_INNER (mode);
3232   val &= GET_MODE_MASK (nmode);
3233   val_so_far &= GET_MODE_MASK (nmode);
3234   gcc_assert (val == val_so_far);
3235
3236   return accum;
3237 }
3238
3239 /* Perform a multiplication and return an rtx for the result.
3240    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3241    TARGET is a suggestion for where to store the result (an rtx).
3242
3243    We check specially for a constant integer as OP1.
3244    If you want this check for OP0 as well, then before calling
3245    you should swap the two operands if OP0 would be constant.  */
3246
3247 rtx
3248 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3249              int unsignedp)
3250 {
3251   enum mult_variant variant;
3252   struct algorithm algorithm;
3253   rtx scalar_op1;
3254   int max_cost;
3255   bool speed = optimize_insn_for_speed_p ();
3256   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3257
3258   if (CONSTANT_P (op0))
3259     std::swap (op0, op1);
3260
3261   /* For vectors, there are several simplifications that can be made if
3262      all elements of the vector constant are identical.  */
3263   scalar_op1 = unwrap_const_vec_duplicate (op1);
3264
3265   if (INTEGRAL_MODE_P (mode))
3266     {
3267       rtx fake_reg;
3268       HOST_WIDE_INT coeff;
3269       bool is_neg;
3270       int mode_bitsize;
3271
3272       if (op1 == CONST0_RTX (mode))
3273         return op1;
3274       if (op1 == CONST1_RTX (mode))
3275         return op0;
3276       if (op1 == CONSTM1_RTX (mode))
3277         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3278                             op0, target, 0);
3279
3280       if (do_trapv)
3281         goto skip_synth;
3282
3283       /* If mode is integer vector mode, check if the backend supports
3284          vector lshift (by scalar or vector) at all.  If not, we can't use
3285          synthetized multiply.  */
3286       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3287           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3288           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3289         goto skip_synth;
3290
3291       /* These are the operations that are potentially turned into
3292          a sequence of shifts and additions.  */
3293       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3294
3295       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3296          less than or equal in size to `unsigned int' this doesn't matter.
3297          If the mode is larger than `unsigned int', then synth_mult works
3298          only if the constant value exactly fits in an `unsigned int' without
3299          any truncation.  This means that multiplying by negative values does
3300          not work; results are off by 2^32 on a 32 bit machine.  */
3301       if (CONST_INT_P (scalar_op1))
3302         {
3303           coeff = INTVAL (scalar_op1);
3304           is_neg = coeff < 0;
3305         }
3306 #if TARGET_SUPPORTS_WIDE_INT
3307       else if (CONST_WIDE_INT_P (scalar_op1))
3308 #else
3309       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3310 #endif
3311         {
3312           int shift = wi::exact_log2 (std::make_pair (scalar_op1, mode));
3313           /* Perfect power of 2 (other than 1, which is handled above).  */
3314           if (shift > 0)
3315             return expand_shift (LSHIFT_EXPR, mode, op0,
3316                                  shift, target, unsignedp);
3317           else
3318             goto skip_synth;
3319         }
3320       else
3321         goto skip_synth;
3322
3323       /* We used to test optimize here, on the grounds that it's better to
3324          produce a smaller program when -O is not used.  But this causes
3325          such a terrible slowdown sometimes that it seems better to always
3326          use synth_mult.  */
3327
3328       /* Special case powers of two.  */
3329       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3330           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3331         return expand_shift (LSHIFT_EXPR, mode, op0,
3332                              floor_log2 (coeff), target, unsignedp);
3333
3334       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3335
3336       /* Attempt to handle multiplication of DImode values by negative
3337          coefficients, by performing the multiplication by a positive
3338          multiplier and then inverting the result.  */
3339       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3340         {
3341           /* Its safe to use -coeff even for INT_MIN, as the
3342              result is interpreted as an unsigned coefficient.
3343              Exclude cost of op0 from max_cost to match the cost
3344              calculation of the synth_mult.  */
3345           coeff = -(unsigned HOST_WIDE_INT) coeff;
3346           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3347                                     mode, speed)
3348                       - neg_cost (speed, mode));
3349           if (max_cost <= 0)
3350             goto skip_synth;
3351
3352           /* Special case powers of two.  */
3353           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3354             {
3355               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3356                                        floor_log2 (coeff), target, unsignedp);
3357               return expand_unop (mode, neg_optab, temp, target, 0);
3358             }
3359
3360           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3361                                    max_cost))
3362             {
3363               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3364                                             &algorithm, variant);
3365               return expand_unop (mode, neg_optab, temp, target, 0);
3366             }
3367           goto skip_synth;
3368         }
3369
3370       /* Exclude cost of op0 from max_cost to match the cost
3371          calculation of the synth_mult.  */
3372       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3373       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3374         return expand_mult_const (mode, op0, coeff, target,
3375                                   &algorithm, variant);
3376     }
3377  skip_synth:
3378
3379   /* Expand x*2.0 as x+x.  */
3380   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3381       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3382     {
3383       op0 = force_reg (GET_MODE (op0), op0);
3384       return expand_binop (mode, add_optab, op0, op0,
3385                            target, unsignedp, OPTAB_LIB_WIDEN);
3386     }
3387
3388   /* This used to use umul_optab if unsigned, but for non-widening multiply
3389      there is no difference between signed and unsigned.  */
3390   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3391                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3392   gcc_assert (op0);
3393   return op0;
3394 }
3395
3396 /* Return a cost estimate for multiplying a register by the given
3397    COEFFicient in the given MODE and SPEED.  */
3398
3399 int
3400 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3401 {
3402   int max_cost;
3403   struct algorithm algorithm;
3404   enum mult_variant variant;
3405
3406   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3407   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3408                            mode, speed);
3409   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3410     return algorithm.cost.cost;
3411   else
3412     return max_cost;
3413 }
3414
3415 /* Perform a widening multiplication and return an rtx for the result.
3416    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3417    TARGET is a suggestion for where to store the result (an rtx).
3418    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3419    or smul_widen_optab.
3420
3421    We check specially for a constant integer as OP1, comparing the
3422    cost of a widening multiply against the cost of a sequence of shifts
3423    and adds.  */
3424
3425 rtx
3426 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3427                       int unsignedp, optab this_optab)
3428 {
3429   bool speed = optimize_insn_for_speed_p ();
3430   rtx cop1;
3431
3432   if (CONST_INT_P (op1)
3433       && GET_MODE (op0) != VOIDmode
3434       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3435                                 this_optab == umul_widen_optab))
3436       && CONST_INT_P (cop1)
3437       && (INTVAL (cop1) >= 0
3438           || HWI_COMPUTABLE_MODE_P (mode)))
3439     {
3440       HOST_WIDE_INT coeff = INTVAL (cop1);
3441       int max_cost;
3442       enum mult_variant variant;
3443       struct algorithm algorithm;
3444
3445       if (coeff == 0)
3446         return CONST0_RTX (mode);
3447
3448       /* Special case powers of two.  */
3449       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3450         {
3451           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3452           return expand_shift (LSHIFT_EXPR, mode, op0,
3453                                floor_log2 (coeff), target, unsignedp);
3454         }
3455
3456       /* Exclude cost of op0 from max_cost to match the cost
3457          calculation of the synth_mult.  */
3458       max_cost = mul_widen_cost (speed, mode);
3459       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3460                                max_cost))
3461         {
3462           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3463           return expand_mult_const (mode, op0, coeff, target,
3464                                     &algorithm, variant);
3465         }
3466     }
3467   return expand_binop (mode, this_optab, op0, op1, target,
3468                        unsignedp, OPTAB_LIB_WIDEN);
3469 }
3470 \f
3471 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3472    replace division by D, and put the least significant N bits of the result
3473    in *MULTIPLIER_PTR and return the most significant bit.
3474
3475    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3476    needed precision is in PRECISION (should be <= N).
3477
3478    PRECISION should be as small as possible so this function can choose
3479    multiplier more freely.
3480
3481    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3482    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3483
3484    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3485    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3486
3487 unsigned HOST_WIDE_INT
3488 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3489                    unsigned HOST_WIDE_INT *multiplier_ptr,
3490                    int *post_shift_ptr, int *lgup_ptr)
3491 {
3492   int lgup, post_shift;
3493   int pow, pow2;
3494
3495   /* lgup = ceil(log2(divisor)); */
3496   lgup = ceil_log2 (d);
3497
3498   gcc_assert (lgup <= n);
3499
3500   pow = n + lgup;
3501   pow2 = n + lgup - precision;
3502
3503   /* mlow = 2^(N + lgup)/d */
3504   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3505   wide_int mlow = wi::udiv_trunc (val, d);
3506
3507   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3508   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3509   wide_int mhigh = wi::udiv_trunc (val, d);
3510
3511   /* If precision == N, then mlow, mhigh exceed 2^N
3512      (but they do not exceed 2^(N+1)).  */
3513
3514   /* Reduce to lowest terms.  */
3515   for (post_shift = lgup; post_shift > 0; post_shift--)
3516     {
3517       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3518                                                        HOST_BITS_PER_WIDE_INT);
3519       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3520                                                        HOST_BITS_PER_WIDE_INT);
3521       if (ml_lo >= mh_lo)
3522         break;
3523
3524       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3525       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3526     }
3527
3528   *post_shift_ptr = post_shift;
3529   *lgup_ptr = lgup;
3530   if (n < HOST_BITS_PER_WIDE_INT)
3531     {
3532       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3533       *multiplier_ptr = mhigh.to_uhwi () & mask;
3534       return mhigh.to_uhwi () >= mask;
3535     }
3536   else
3537     {
3538       *multiplier_ptr = mhigh.to_uhwi ();
3539       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3540     }
3541 }
3542
3543 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3544    congruent to 1 (mod 2**N).  */
3545
3546 static unsigned HOST_WIDE_INT
3547 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3548 {
3549   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3550
3551   /* The algorithm notes that the choice y = x satisfies
3552      x*y == 1 mod 2^3, since x is assumed odd.
3553      Each iteration doubles the number of bits of significance in y.  */
3554
3555   unsigned HOST_WIDE_INT mask;
3556   unsigned HOST_WIDE_INT y = x;
3557   int nbit = 3;
3558
3559   mask = (n == HOST_BITS_PER_WIDE_INT
3560           ? ~(unsigned HOST_WIDE_INT) 0
3561           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3562
3563   while (nbit < n)
3564     {
3565       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3566       nbit *= 2;
3567     }
3568   return y;
3569 }
3570
3571 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3572    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3573    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3574    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3575    become signed.
3576
3577    The result is put in TARGET if that is convenient.
3578
3579    MODE is the mode of operation.  */
3580
3581 rtx
3582 expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
3583                              rtx op1, rtx target, int unsignedp)
3584 {
3585   rtx tem;
3586   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3587
3588   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3589                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3590   tem = expand_and (mode, tem, op1, NULL_RTX);
3591   adj_operand
3592     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3593                      adj_operand);
3594
3595   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3596                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3597   tem = expand_and (mode, tem, op0, NULL_RTX);
3598   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3599                           target);
3600
3601   return target;
3602 }
3603
3604 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3605
3606 static rtx
3607 extract_high_half (machine_mode mode, rtx op)
3608 {
3609   machine_mode wider_mode;
3610
3611   if (mode == word_mode)
3612     return gen_highpart (mode, op);
3613
3614   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3615
3616   wider_mode = GET_MODE_WIDER_MODE (mode);
3617   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3618                      GET_MODE_BITSIZE (mode), 0, 1);
3619   return convert_modes (mode, wider_mode, op, 0);
3620 }
3621
3622 /* Like expmed_mult_highpart, but only consider using a multiplication
3623    optab.  OP1 is an rtx for the constant operand.  */
3624
3625 static rtx
3626 expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
3627                             rtx target, int unsignedp, int max_cost)
3628 {
3629   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3630   machine_mode wider_mode;
3631   optab moptab;
3632   rtx tem;
3633   int size;
3634   bool speed = optimize_insn_for_speed_p ();
3635
3636   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3637
3638   wider_mode = GET_MODE_WIDER_MODE (mode);
3639   size = GET_MODE_BITSIZE (mode);
3640
3641   /* Firstly, try using a multiplication insn that only generates the needed
3642      high part of the product, and in the sign flavor of unsignedp.  */
3643   if (mul_highpart_cost (speed, mode) < max_cost)
3644     {
3645       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3646       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3647                           unsignedp, OPTAB_DIRECT);
3648       if (tem)
3649         return tem;
3650     }
3651
3652   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3653      Need to adjust the result after the multiplication.  */
3654   if (size - 1 < BITS_PER_WORD
3655       && (mul_highpart_cost (speed, mode)
3656           + 2 * shift_cost (speed, mode, size-1)
3657           + 4 * add_cost (speed, mode) < max_cost))
3658     {
3659       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3660       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3661                           unsignedp, OPTAB_DIRECT);
3662       if (tem)
3663         /* We used the wrong signedness.  Adjust the result.  */
3664         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3665                                             tem, unsignedp);
3666     }
3667
3668   /* Try widening multiplication.  */
3669   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3670   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3671       && mul_widen_cost (speed, wider_mode) < max_cost)
3672     {
3673       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3674                           unsignedp, OPTAB_WIDEN);
3675       if (tem)
3676         return extract_high_half (mode, tem);
3677     }
3678
3679   /* Try widening the mode and perform a non-widening multiplication.  */
3680   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3681       && size - 1 < BITS_PER_WORD
3682       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3683           < max_cost))
3684     {
3685       rtx_insn *insns;
3686       rtx wop0, wop1;
3687
3688       /* We need to widen the operands, for example to ensure the
3689          constant multiplier is correctly sign or zero extended.
3690          Use a sequence to clean-up any instructions emitted by
3691          the conversions if things don't work out.  */
3692       start_sequence ();
3693       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3694       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3695       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3696                           unsignedp, OPTAB_WIDEN);
3697       insns = get_insns ();
3698       end_sequence ();
3699
3700       if (tem)
3701         {
3702           emit_insn (insns);
3703           return extract_high_half (mode, tem);
3704         }
3705     }
3706
3707   /* Try widening multiplication of opposite signedness, and adjust.  */
3708   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3709   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3710       && size - 1 < BITS_PER_WORD
3711       && (mul_widen_cost (speed, wider_mode)
3712           + 2 * shift_cost (speed, mode, size-1)
3713           + 4 * add_cost (speed, mode) < max_cost))
3714     {
3715       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3716                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3717       if (tem != 0)
3718         {
3719           tem = extract_high_half (mode, tem);
3720           /* We used the wrong signedness.  Adjust the result.  */
3721           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3722                                               target, unsignedp);
3723         }
3724     }
3725
3726   return 0;
3727 }
3728
3729 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3730    putting the high half of the result in TARGET if that is convenient,
3731    and return where the result is.  If the operation can not be performed,
3732    0 is returned.
3733
3734    MODE is the mode of operation and result.
3735
3736    UNSIGNEDP nonzero means unsigned multiply.
3737
3738    MAX_COST is the total allowed cost for the expanded RTL.  */
3739
3740 static rtx
3741 expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
3742                       rtx target, int unsignedp, int max_cost)
3743 {
3744   machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3745   unsigned HOST_WIDE_INT cnst1;
3746   int extra_cost;
3747   bool sign_adjust = false;
3748   enum mult_variant variant;
3749   struct algorithm alg;
3750   rtx tem;
3751   bool speed = optimize_insn_for_speed_p ();
3752
3753   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3754   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3755   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3756
3757   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3758
3759   /* We can't optimize modes wider than BITS_PER_WORD.
3760      ??? We might be able to perform double-word arithmetic if
3761      mode == word_mode, however all the cost calculations in
3762      synth_mult etc. assume single-word operations.  */
3763   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3764     return expmed_mult_highpart_optab (mode, op0, op1, target,
3765                                        unsignedp, max_cost);
3766
3767   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3768
3769   /* Check whether we try to multiply by a negative constant.  */
3770   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3771     {
3772       sign_adjust = true;
3773       extra_cost += add_cost (speed, mode);
3774     }
3775
3776   /* See whether shift/add multiplication is cheap enough.  */
3777   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3778                            max_cost - extra_cost))
3779     {
3780       /* See whether the specialized multiplication optabs are
3781          cheaper than the shift/add version.  */
3782       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3783                                         alg.cost.cost + extra_cost);
3784       if (tem)
3785         return tem;
3786
3787       tem = convert_to_mode (wider_mode, op0, unsignedp);
3788       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3789       tem = extract_high_half (mode, tem);
3790
3791       /* Adjust result for signedness.  */
3792       if (sign_adjust)
3793         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3794
3795       return tem;
3796     }
3797   return expmed_mult_highpart_optab (mode, op0, op1, target,
3798                                      unsignedp, max_cost);
3799 }
3800
3801
3802 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3803
3804 static rtx
3805 expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3806 {
3807   rtx result, temp, shift;
3808   rtx_code_label *label;
3809   int logd;
3810   int prec = GET_MODE_PRECISION (mode);
3811
3812   logd = floor_log2 (d);
3813   result = gen_reg_rtx (mode);
3814
3815   /* Avoid conditional branches when they're expensive.  */
3816   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3817       && optimize_insn_for_speed_p ())
3818     {
3819       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3820                                       mode, 0, -1);
3821       if (signmask)
3822         {
3823           HOST_WIDE_INT masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3824           signmask = force_reg (mode, signmask);
3825           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3826
3827           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3828              which instruction sequence to use.  If logical right shifts
3829              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3830              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3831
3832           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3833           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3834               || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
3835                   > COSTS_N_INSNS (2)))
3836             {
3837               temp = expand_binop (mode, xor_optab, op0, signmask,
3838                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3839               temp = expand_binop (mode, sub_optab, temp, signmask,
3840                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3841               temp = expand_binop (mode, and_optab, temp,
3842                                    gen_int_mode (masklow, mode),
3843                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3844               temp = expand_binop (mode, xor_optab, temp, signmask,
3845                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3846               temp = expand_binop (mode, sub_optab, temp, signmask,
3847                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3848             }
3849           else
3850             {
3851               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3852                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3853               signmask = force_reg (mode, signmask);
3854
3855               temp = expand_binop (mode, add_optab, op0, signmask,
3856                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3857               temp = expand_binop (mode, and_optab, temp,
3858                                    gen_int_mode (masklow, mode),
3859                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3860               temp = expand_binop (mode, sub_optab, temp, signmask,
3861                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3862             }
3863           return temp;
3864         }
3865     }
3866
3867   /* Mask contains the mode's signbit and the significant bits of the
3868      modulus.  By including the signbit in the operation, many targets
3869      can avoid an explicit compare operation in the following comparison
3870      against zero.  */
3871   wide_int mask = wi::mask (logd, false, prec);
3872   mask = wi::set_bit (mask, prec - 1);
3873
3874   temp = expand_binop (mode, and_optab, op0,
3875                        immed_wide_int_const (mask, mode),
3876                        result, 1, OPTAB_LIB_WIDEN);
3877   if (temp != result)
3878     emit_move_insn (result, temp);
3879
3880   label = gen_label_rtx ();
3881   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3882
3883   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3884                        0, OPTAB_LIB_WIDEN);
3885
3886   mask = wi::mask (logd, true, prec);
3887   temp = expand_binop (mode, ior_optab, temp,
3888                        immed_wide_int_const (mask, mode),
3889                        result, 1, OPTAB_LIB_WIDEN);
3890   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3891                        0, OPTAB_LIB_WIDEN);
3892   if (temp != result)
3893     emit_move_insn (result, temp);
3894   emit_label (label);
3895   return result;
3896 }
3897
3898 /* Expand signed division of OP0 by a power of two D in mode MODE.
3899    This routine is only called for positive values of D.  */
3900
3901 static rtx
3902 expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3903 {
3904   rtx temp;
3905   rtx_code_label *label;
3906   int logd;
3907
3908   logd = floor_log2 (d);
3909
3910   if (d == 2
3911       && BRANCH_COST (optimize_insn_for_speed_p (),
3912                       false) >= 1)
3913     {
3914       temp = gen_reg_rtx (mode);
3915       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3916       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3917                            0, OPTAB_LIB_WIDEN);
3918       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3919     }
3920
3921   if (HAVE_conditional_move
3922       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
3923     {
3924       rtx temp2;
3925
3926       start_sequence ();
3927       temp2 = copy_to_mode_reg (mode, op0);
3928       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3929                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3930       temp = force_reg (mode, temp);
3931
3932       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3933       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3934                                      mode, temp, temp2, mode, 0);
3935       if (temp2)
3936         {
3937           rtx_insn *seq = get_insns ();
3938           end_sequence ();
3939           emit_insn (seq);
3940           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3941         }
3942       end_sequence ();
3943     }
3944
3945   if (BRANCH_COST (optimize_insn_for_speed_p (),
3946                    false) >= 2)
3947     {
3948       int ushift = GET_MODE_BITSIZE (mode) - logd;
3949
3950       temp = gen_reg_rtx (mode);
3951       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3952       if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3953           || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3954              > COSTS_N_INSNS (1))
3955         temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3956                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3957       else
3958         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3959                              ushift, NULL_RTX, 1);
3960       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3961                            0, OPTAB_LIB_WIDEN);
3962       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3963     }
3964
3965   label = gen_label_rtx ();
3966   temp = copy_to_mode_reg (mode, op0);
3967   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3968   expand_inc (temp, gen_int_mode (d - 1, mode));
3969   emit_label (label);
3970   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3971 }
3972 \f
3973 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3974    if that is convenient, and returning where the result is.
3975    You may request either the quotient or the remainder as the result;
3976    specify REM_FLAG nonzero to get the remainder.
3977
3978    CODE is the expression code for which kind of division this is;
3979    it controls how rounding is done.  MODE is the machine mode to use.
3980    UNSIGNEDP nonzero means do unsigned division.  */
3981
3982 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3983    and then correct it by or'ing in missing high bits
3984    if result of ANDI is nonzero.
3985    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3986    This could optimize to a bfexts instruction.
3987    But C doesn't use these operations, so their optimizations are
3988    left for later.  */
3989 /* ??? For modulo, we don't actually need the highpart of the first product,
3990    the low part will do nicely.  And for small divisors, the second multiply
3991    can also be a low-part only multiply or even be completely left out.
3992    E.g. to calculate the remainder of a division by 3 with a 32 bit
3993    multiply, multiply with 0x55555556 and extract the upper two bits;
3994    the result is exact for inputs up to 0x1fffffff.
3995    The input range can be reduced by using cross-sum rules.
3996    For odd divisors >= 3, the following table gives right shift counts
3997    so that if a number is shifted by an integer multiple of the given
3998    amount, the remainder stays the same:
3999    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4000    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4001    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4002    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4003    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4004
4005    Cross-sum rules for even numbers can be derived by leaving as many bits
4006    to the right alone as the divisor has zeros to the right.
4007    E.g. if x is an unsigned 32 bit number:
4008    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4009    */
4010
4011 rtx
4012 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4013                rtx op0, rtx op1, rtx target, int unsignedp)
4014 {
4015   machine_mode compute_mode;
4016   rtx tquotient;
4017   rtx quotient = 0, remainder = 0;
4018   rtx_insn *last;
4019   int size;
4020   rtx_insn *insn;
4021   optab optab1, optab2;
4022   int op1_is_constant, op1_is_pow2 = 0;
4023   int max_cost, extra_cost;
4024   static HOST_WIDE_INT last_div_const = 0;
4025   bool speed = optimize_insn_for_speed_p ();
4026
4027   op1_is_constant = CONST_INT_P (op1);
4028   if (op1_is_constant)
4029     {
4030       unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1);
4031       if (unsignedp)
4032         ext_op1 &= GET_MODE_MASK (mode);
4033       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
4034                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
4035     }
4036
4037   /*
4038      This is the structure of expand_divmod:
4039
4040      First comes code to fix up the operands so we can perform the operations
4041      correctly and efficiently.
4042
4043      Second comes a switch statement with code specific for each rounding mode.
4044      For some special operands this code emits all RTL for the desired
4045      operation, for other cases, it generates only a quotient and stores it in
4046      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4047      to indicate that it has not done anything.
4048
4049      Last comes code that finishes the operation.  If QUOTIENT is set and
4050      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4051      QUOTIENT is not set, it is computed using trunc rounding.
4052
4053      We try to generate special code for division and remainder when OP1 is a
4054      constant.  If |OP1| = 2**n we can use shifts and some other fast
4055      operations.  For other values of OP1, we compute a carefully selected
4056      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4057      by m.
4058
4059      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4060      half of the product.  Different strategies for generating the product are
4061      implemented in expmed_mult_highpart.
4062
4063      If what we actually want is the remainder, we generate that by another
4064      by-constant multiplication and a subtraction.  */
4065
4066   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4067      code below will malfunction if we are, so check here and handle
4068      the special case if so.  */
4069   if (op1 == const1_rtx)
4070     return rem_flag ? const0_rtx : op0;
4071
4072     /* When dividing by -1, we could get an overflow.
4073      negv_optab can handle overflows.  */
4074   if (! unsignedp && op1 == constm1_rtx)
4075     {
4076       if (rem_flag)
4077         return const0_rtx;
4078       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4079                           ? negv_optab : neg_optab, op0, target, 0);
4080     }
4081
4082   if (target
4083       /* Don't use the function value register as a target
4084          since we have to read it as well as write it,
4085          and function-inlining gets confused by this.  */
4086       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4087           /* Don't clobber an operand while doing a multi-step calculation.  */
4088           || ((rem_flag || op1_is_constant)
4089               && (reg_mentioned_p (target, op0)
4090                   || (MEM_P (op0) && MEM_P (target))))
4091           || reg_mentioned_p (target, op1)
4092           || (MEM_P (op1) && MEM_P (target))))
4093     target = 0;
4094
4095   /* Get the mode in which to perform this computation.  Normally it will
4096      be MODE, but sometimes we can't do the desired operation in MODE.
4097      If so, pick a wider mode in which we can do the operation.  Convert
4098      to that mode at the start to avoid repeated conversions.
4099
4100      First see what operations we need.  These depend on the expression
4101      we are evaluating.  (We assume that divxx3 insns exist under the
4102      same conditions that modxx3 insns and that these insns don't normally
4103      fail.  If these assumptions are not correct, we may generate less
4104      efficient code in some cases.)
4105
4106      Then see if we find a mode in which we can open-code that operation
4107      (either a division, modulus, or shift).  Finally, check for the smallest
4108      mode for which we can do the operation with a library call.  */
4109
4110   /* We might want to refine this now that we have division-by-constant
4111      optimization.  Since expmed_mult_highpart tries so many variants, it is
4112      not straightforward to generalize this.  Maybe we should make an array
4113      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4114
4115   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
4116             ? (unsignedp ? lshr_optab : ashr_optab)
4117             : (unsignedp ? udiv_optab : sdiv_optab));
4118   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
4119             ? optab1
4120             : (unsignedp ? udivmod_optab : sdivmod_optab));
4121
4122   for (compute_mode = mode; compute_mode != VOIDmode;
4123        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4124     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4125         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4126       break;
4127
4128   if (compute_mode == VOIDmode)
4129     for (compute_mode = mode; compute_mode != VOIDmode;
4130          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4131       if (optab_libfunc (optab1, compute_mode)
4132           || optab_libfunc (optab2, compute_mode))
4133         break;
4134
4135   /* If we still couldn't find a mode, use MODE, but expand_binop will
4136      probably die.  */
4137   if (compute_mode == VOIDmode)
4138     compute_mode = mode;
4139
4140   if (target && GET_MODE (target) == compute_mode)
4141     tquotient = target;
4142   else
4143     tquotient = gen_reg_rtx (compute_mode);
4144
4145   size = GET_MODE_BITSIZE (compute_mode);
4146 #if 0
4147   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4148      (mode), and thereby get better code when OP1 is a constant.  Do that
4149      later.  It will require going over all usages of SIZE below.  */
4150   size = GET_MODE_BITSIZE (mode);
4151 #endif
4152
4153   /* Only deduct something for a REM if the last divide done was
4154      for a different constant.   Then set the constant of the last
4155      divide.  */
4156   max_cost = (unsignedp
4157               ? udiv_cost (speed, compute_mode)
4158               : sdiv_cost (speed, compute_mode));
4159   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4160                      && INTVAL (op1) == last_div_const))
4161     max_cost -= (mul_cost (speed, compute_mode)
4162                  + add_cost (speed, compute_mode));
4163
4164   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4165
4166   /* Now convert to the best mode to use.  */
4167   if (compute_mode != mode)
4168     {
4169       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4170       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4171
4172       /* convert_modes may have placed op1 into a register, so we
4173          must recompute the following.  */
4174       op1_is_constant = CONST_INT_P (op1);
4175       op1_is_pow2 = (op1_is_constant
4176                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4177                           || (! unsignedp
4178                               && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1))))));
4179     }
4180
4181   /* If one of the operands is a volatile MEM, copy it into a register.  */
4182
4183   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4184     op0 = force_reg (compute_mode, op0);
4185   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4186     op1 = force_reg (compute_mode, op1);
4187
4188   /* If we need the remainder or if OP1 is constant, we need to
4189      put OP0 in a register in case it has any queued subexpressions.  */
4190   if (rem_flag || op1_is_constant)
4191     op0 = force_reg (compute_mode, op0);
4192
4193   last = get_last_insn ();
4194
4195   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4196   if (unsignedp)
4197     {
4198       if (code == FLOOR_DIV_EXPR)
4199         code = TRUNC_DIV_EXPR;
4200       if (code == FLOOR_MOD_EXPR)
4201         code = TRUNC_MOD_EXPR;
4202       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4203         code = TRUNC_DIV_EXPR;
4204     }
4205
4206   if (op1 != const0_rtx)
4207     switch (code)
4208       {
4209       case TRUNC_MOD_EXPR:
4210       case TRUNC_DIV_EXPR:
4211         if (op1_is_constant)
4212           {
4213             if (unsignedp)
4214               {
4215                 unsigned HOST_WIDE_INT mh, ml;
4216                 int pre_shift, post_shift;
4217                 int dummy;
4218                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4219                                             & GET_MODE_MASK (compute_mode));
4220
4221                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4222                   {
4223                     pre_shift = floor_log2 (d);
4224                     if (rem_flag)
4225                       {
4226                         unsigned HOST_WIDE_INT mask
4227                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4228                         remainder
4229                           = expand_binop (compute_mode, and_optab, op0,
4230                                           gen_int_mode (mask, compute_mode),
4231                                           remainder, 1,
4232                                           OPTAB_LIB_WIDEN);
4233                         if (remainder)
4234                           return gen_lowpart (mode, remainder);
4235                       }
4236                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4237                                              pre_shift, tquotient, 1);
4238                   }
4239                 else if (size <= HOST_BITS_PER_WIDE_INT)
4240                   {
4241                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4242                       {
4243                         /* Most significant bit of divisor is set; emit an scc
4244                            insn.  */
4245                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4246                                                           compute_mode, 1, 1);
4247                       }
4248                     else
4249                       {
4250                         /* Find a suitable multiplier and right shift count
4251                            instead of multiplying with D.  */
4252
4253                         mh = choose_multiplier (d, size, size,
4254                                                 &ml, &post_shift, &dummy);
4255
4256                         /* If the suggested multiplier is more than SIZE bits,
4257                            we can do better for even divisors, using an
4258                            initial right shift.  */
4259                         if (mh != 0 && (d & 1) == 0)
4260                           {
4261                             pre_shift = floor_log2 (d & -d);
4262                             mh = choose_multiplier (d >> pre_shift, size,
4263                                                     size - pre_shift,
4264                                                     &ml, &post_shift, &dummy);
4265                             gcc_assert (!mh);
4266                           }
4267                         else
4268                           pre_shift = 0;
4269
4270                         if (mh != 0)
4271                           {
4272                             rtx t1, t2, t3, t4;
4273
4274                             if (post_shift - 1 >= BITS_PER_WORD)
4275                               goto fail1;
4276
4277                             extra_cost
4278                               = (shift_cost (speed, compute_mode, post_shift - 1)
4279                                  + shift_cost (speed, compute_mode, 1)
4280                                  + 2 * add_cost (speed, compute_mode));
4281                             t1 = expmed_mult_highpart
4282                               (compute_mode, op0,
4283                                gen_int_mode (ml, compute_mode),
4284                                NULL_RTX, 1, max_cost - extra_cost);
4285                             if (t1 == 0)
4286                               goto fail1;
4287                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4288                                                                op0, t1),
4289                                                 NULL_RTX);
4290                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4291                                                t2, 1, NULL_RTX, 1);
4292                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4293                                                               t1, t3),
4294                                                 NULL_RTX);
4295                             quotient = expand_shift
4296                               (RSHIFT_EXPR, compute_mode, t4,
4297                                post_shift - 1, tquotient, 1);
4298                           }
4299                         else
4300                           {
4301                             rtx t1, t2;
4302
4303                             if (pre_shift >= BITS_PER_WORD
4304                                 || post_shift >= BITS_PER_WORD)
4305                               goto fail1;
4306
4307                             t1 = expand_shift
4308                               (RSHIFT_EXPR, compute_mode, op0,
4309                                pre_shift, NULL_RTX, 1);
4310                             extra_cost
4311                               = (shift_cost (speed, compute_mode, pre_shift)
4312                                  + shift_cost (speed, compute_mode, post_shift));
4313                             t2 = expmed_mult_highpart
4314                               (compute_mode, t1,
4315                                gen_int_mode (ml, compute_mode),
4316                                NULL_RTX, 1, max_cost - extra_cost);
4317                             if (t2 == 0)
4318                               goto fail1;
4319                             quotient = expand_shift
4320                               (RSHIFT_EXPR, compute_mode, t2,
4321                                post_shift, tquotient, 1);
4322                           }
4323                       }
4324                   }
4325                 else            /* Too wide mode to use tricky code */
4326                   break;
4327
4328                 insn = get_last_insn ();
4329                 if (insn != last)
4330                   set_dst_reg_note (insn, REG_EQUAL,
4331                                     gen_rtx_UDIV (compute_mode, op0, op1),
4332                                     quotient);
4333               }
4334             else                /* TRUNC_DIV, signed */
4335               {
4336                 unsigned HOST_WIDE_INT ml;
4337                 int lgup, post_shift;
4338                 rtx mlr;
4339                 HOST_WIDE_INT d = INTVAL (op1);
4340                 unsigned HOST_WIDE_INT abs_d;
4341
4342                 /* Since d might be INT_MIN, we have to cast to
4343                    unsigned HOST_WIDE_INT before negating to avoid
4344                    undefined signed overflow.  */
4345                 abs_d = (d >= 0
4346                          ? (unsigned HOST_WIDE_INT) d
4347                          : - (unsigned HOST_WIDE_INT) d);
4348
4349                 /* n rem d = n rem -d */
4350                 if (rem_flag && d < 0)
4351                   {
4352                     d = abs_d;
4353                     op1 = gen_int_mode (abs_d, compute_mode);
4354                   }
4355
4356                 if (d == 1)
4357                   quotient = op0;
4358                 else if (d == -1)
4359                   quotient = expand_unop (compute_mode, neg_optab, op0,
4360                                           tquotient, 0);
4361                 else if (HOST_BITS_PER_WIDE_INT >= size
4362                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4363                   {
4364                     /* This case is not handled correctly below.  */
4365                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4366                                                 compute_mode, 1, 1);
4367                     if (quotient == 0)
4368                       goto fail1;
4369                   }
4370                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4371                          && (rem_flag
4372                              ? smod_pow2_cheap (speed, compute_mode)
4373                              : sdiv_pow2_cheap (speed, compute_mode))
4374                          /* We assume that cheap metric is true if the
4375                             optab has an expander for this mode.  */
4376                          && ((optab_handler ((rem_flag ? smod_optab
4377                                               : sdiv_optab),
4378                                              compute_mode)
4379                               != CODE_FOR_nothing)
4380                              || (optab_handler (sdivmod_optab,
4381                                                 compute_mode)
4382                                  != CODE_FOR_nothing)))
4383                   ;
4384                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4385                   {
4386                     if (rem_flag)
4387                       {
4388                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4389                         if (remainder)
4390                           return gen_lowpart (mode, remainder);
4391                       }
4392
4393                     if (sdiv_pow2_cheap (speed, compute_mode)
4394                         && ((optab_handler (sdiv_optab, compute_mode)
4395                              != CODE_FOR_nothing)
4396                             || (optab_handler (sdivmod_optab, compute_mode)
4397                                 != CODE_FOR_nothing)))
4398                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4399                                                 compute_mode, op0,
4400                                                 gen_int_mode (abs_d,
4401                                                               compute_mode),
4402                                                 NULL_RTX, 0);
4403                     else
4404                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4405
4406                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4407                        negate the quotient.  */
4408                     if (d < 0)
4409                       {
4410                         insn = get_last_insn ();
4411                         if (insn != last
4412                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4413                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4414                           set_dst_reg_note (insn, REG_EQUAL,
4415                                             gen_rtx_DIV (compute_mode, op0,
4416                                                          gen_int_mode
4417                                                            (abs_d,
4418                                                             compute_mode)),
4419                                             quotient);
4420
4421                         quotient = expand_unop (compute_mode, neg_optab,
4422                                                 quotient, quotient, 0);
4423                       }
4424                   }
4425                 else if (size <= HOST_BITS_PER_WIDE_INT)
4426                   {
4427                     choose_multiplier (abs_d, size, size - 1,
4428                                        &ml, &post_shift, &lgup);
4429                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4430                       {
4431                         rtx t1, t2, t3;
4432
4433                         if (post_shift >= BITS_PER_WORD
4434                             || size - 1 >= BITS_PER_WORD)
4435                           goto fail1;
4436
4437                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4438                                       + shift_cost (speed, compute_mode, size - 1)
4439                                       + add_cost (speed, compute_mode));
4440                         t1 = expmed_mult_highpart
4441                           (compute_mode, op0, gen_int_mode (ml, compute_mode),
4442                            NULL_RTX, 0, max_cost - extra_cost);
4443                         if (t1 == 0)
4444                           goto fail1;
4445                         t2 = expand_shift
4446                           (RSHIFT_EXPR, compute_mode, t1,
4447                            post_shift, NULL_RTX, 0);
4448                         t3 = expand_shift
4449                           (RSHIFT_EXPR, compute_mode, op0,
4450                            size - 1, NULL_RTX, 0);
4451                         if (d < 0)
4452                           quotient
4453                             = force_operand (gen_rtx_MINUS (compute_mode,
4454                                                             t3, t2),
4455                                              tquotient);
4456                         else
4457                           quotient
4458                             = force_operand (gen_rtx_MINUS (compute_mode,
4459                                                             t2, t3),
4460                                              tquotient);
4461                       }
4462                     else
4463                       {
4464                         rtx t1, t2, t3, t4;
4465
4466                         if (post_shift >= BITS_PER_WORD
4467                             || size - 1 >= BITS_PER_WORD)
4468                           goto fail1;
4469
4470                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4471                         mlr = gen_int_mode (ml, compute_mode);
4472                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4473                                       + shift_cost (speed, compute_mode, size - 1)
4474                                       + 2 * add_cost (speed, compute_mode));
4475                         t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4476                                                    NULL_RTX, 0,
4477                                                    max_cost - extra_cost);
4478                         if (t1 == 0)
4479                           goto fail1;
4480                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4481                                                           t1, op0),
4482                                             NULL_RTX);
4483                         t3 = expand_shift
4484                           (RSHIFT_EXPR, compute_mode, t2,
4485                            post_shift, NULL_RTX, 0);
4486                         t4 = expand_shift
4487                           (RSHIFT_EXPR, compute_mode, op0,
4488                            size - 1, NULL_RTX, 0);
4489                         if (d < 0)
4490                           quotient
4491                             = force_operand (gen_rtx_MINUS (compute_mode,
4492                                                             t4, t3),
4493                                              tquotient);
4494                         else
4495                           quotient
4496                             = force_operand (gen_rtx_MINUS (compute_mode,
4497                                                             t3, t4),
4498                                              tquotient);
4499                       }
4500                   }
4501                 else            /* Too wide mode to use tricky code */
4502                   break;
4503
4504                 insn = get_last_insn ();
4505                 if (insn != last)
4506                   set_dst_reg_note (insn, REG_EQUAL,
4507                                     gen_rtx_DIV (compute_mode, op0, op1),
4508                                     quotient);
4509               }
4510             break;
4511           }
4512       fail1:
4513         delete_insns_since (last);
4514         break;
4515
4516       case FLOOR_DIV_EXPR:
4517       case FLOOR_MOD_EXPR:
4518       /* We will come here only for signed operations.  */
4519         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4520           {
4521             unsigned HOST_WIDE_INT mh, ml;
4522             int pre_shift, lgup, post_shift;
4523             HOST_WIDE_INT d = INTVAL (op1);
4524
4525             if (d > 0)
4526               {
4527                 /* We could just as easily deal with negative constants here,
4528                    but it does not seem worth the trouble for GCC 2.6.  */
4529                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4530                   {
4531                     pre_shift = floor_log2 (d);
4532                     if (rem_flag)
4533                       {
4534                         unsigned HOST_WIDE_INT mask
4535                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4536                         remainder = expand_binop
4537                           (compute_mode, and_optab, op0,
4538                            gen_int_mode (mask, compute_mode),
4539                            remainder, 0, OPTAB_LIB_WIDEN);
4540                         if (remainder)
4541                           return gen_lowpart (mode, remainder);
4542                       }
4543                     quotient = expand_shift
4544                       (RSHIFT_EXPR, compute_mode, op0,
4545                        pre_shift, tquotient, 0);
4546                   }
4547                 else
4548                   {
4549                     rtx t1, t2, t3, t4;
4550
4551                     mh = choose_multiplier (d, size, size - 1,
4552                                             &ml, &post_shift, &lgup);
4553                     gcc_assert (!mh);
4554
4555                     if (post_shift < BITS_PER_WORD
4556                         && size - 1 < BITS_PER_WORD)
4557                       {
4558                         t1 = expand_shift
4559                           (RSHIFT_EXPR, compute_mode, op0,
4560                            size - 1, NULL_RTX, 0);
4561                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4562                                            NULL_RTX, 0, OPTAB_WIDEN);
4563                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4564                                       + shift_cost (speed, compute_mode, size - 1)
4565                                       + 2 * add_cost (speed, compute_mode));
4566                         t3 = expmed_mult_highpart
4567                           (compute_mode, t2, gen_int_mode (ml, compute_mode),
4568                            NULL_RTX, 1, max_cost - extra_cost);
4569                         if (t3 != 0)
4570                           {
4571                             t4 = expand_shift
4572                               (RSHIFT_EXPR, compute_mode, t3,
4573                                post_shift, NULL_RTX, 1);
4574                             quotient = expand_binop (compute_mode, xor_optab,
4575                                                      t4, t1, tquotient, 0,
4576                                                      OPTAB_WIDEN);
4577                           }
4578                       }
4579                   }
4580               }
4581             else
4582               {
4583                 rtx nsign, t1, t2, t3, t4;
4584                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4585                                                   op0, constm1_rtx), NULL_RTX);
4586                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4587                                    0, OPTAB_WIDEN);
4588                 nsign = expand_shift
4589                   (RSHIFT_EXPR, compute_mode, t2,
4590                    size - 1, NULL_RTX, 0);
4591                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4592                                     NULL_RTX);
4593                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4594                                     NULL_RTX, 0);
4595                 if (t4)
4596                   {
4597                     rtx t5;
4598                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4599                                       NULL_RTX, 0);
4600                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4601                                                             t4, t5),
4602                                               tquotient);
4603                   }
4604               }
4605           }
4606
4607         if (quotient != 0)
4608           break;
4609         delete_insns_since (last);
4610
4611         /* Try using an instruction that produces both the quotient and
4612            remainder, using truncation.  We can easily compensate the quotient
4613            or remainder to get floor rounding, once we have the remainder.
4614            Notice that we compute also the final remainder value here,
4615            and return the result right away.  */
4616         if (target == 0 || GET_MODE (target) != compute_mode)
4617           target = gen_reg_rtx (compute_mode);
4618
4619         if (rem_flag)
4620           {
4621             remainder
4622               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4623             quotient = gen_reg_rtx (compute_mode);
4624           }
4625         else
4626           {
4627             quotient
4628               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4629             remainder = gen_reg_rtx (compute_mode);
4630           }
4631
4632         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4633                                  quotient, remainder, 0))
4634           {
4635             /* This could be computed with a branch-less sequence.
4636                Save that for later.  */
4637             rtx tem;
4638             rtx_code_label *label = gen_label_rtx ();
4639             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4640             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4641                                 NULL_RTX, 0, OPTAB_WIDEN);
4642             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4643             expand_dec (quotient, const1_rtx);
4644             expand_inc (remainder, op1);
4645             emit_label (label);
4646             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4647           }
4648
4649         /* No luck with division elimination or divmod.  Have to do it
4650            by conditionally adjusting op0 *and* the result.  */
4651         {
4652           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4653           rtx adjusted_op0;
4654           rtx tem;
4655
4656           quotient = gen_reg_rtx (compute_mode);
4657           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4658           label1 = gen_label_rtx ();
4659           label2 = gen_label_rtx ();
4660           label3 = gen_label_rtx ();
4661           label4 = gen_label_rtx ();
4662           label5 = gen_label_rtx ();
4663           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4664           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4665           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4666                               quotient, 0, OPTAB_LIB_WIDEN);
4667           if (tem != quotient)
4668             emit_move_insn (quotient, tem);
4669           emit_jump_insn (targetm.gen_jump (label5));
4670           emit_barrier ();
4671           emit_label (label1);
4672           expand_inc (adjusted_op0, const1_rtx);
4673           emit_jump_insn (targetm.gen_jump (label4));
4674           emit_barrier ();
4675           emit_label (label2);
4676           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4677           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4678                               quotient, 0, OPTAB_LIB_WIDEN);
4679           if (tem != quotient)
4680             emit_move_insn (quotient, tem);
4681           emit_jump_insn (targetm.gen_jump (label5));
4682           emit_barrier ();
4683           emit_label (label3);
4684           expand_dec (adjusted_op0, const1_rtx);
4685           emit_label (label4);
4686           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4687                               quotient, 0, OPTAB_LIB_WIDEN);
4688           if (tem != quotient)
4689             emit_move_insn (quotient, tem);
4690           expand_dec (quotient, const1_rtx);
4691           emit_label (label5);
4692         }
4693         break;
4694
4695       case CEIL_DIV_EXPR:
4696       case CEIL_MOD_EXPR:
4697         if (unsignedp)
4698           {
4699             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4700               {
4701                 rtx t1, t2, t3;
4702                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4703                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4704                                    floor_log2 (d), tquotient, 1);
4705                 t2 = expand_binop (compute_mode, and_optab, op0,
4706                                    gen_int_mode (d - 1, compute_mode),
4707                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4708                 t3 = gen_reg_rtx (compute_mode);
4709                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4710                                       compute_mode, 1, 1);
4711                 if (t3 == 0)
4712                   {
4713                     rtx_code_label *lab;
4714                     lab = gen_label_rtx ();
4715                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4716                     expand_inc (t1, const1_rtx);
4717                     emit_label (lab);
4718                     quotient = t1;
4719                   }
4720                 else
4721                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4722                                                           t1, t3),
4723                                             tquotient);
4724                 break;
4725               }
4726
4727             /* Try using an instruction that produces both the quotient and
4728                remainder, using truncation.  We can easily compensate the
4729                quotient or remainder to get ceiling rounding, once we have the
4730                remainder.  Notice that we compute also the final remainder
4731                value here, and return the result right away.  */
4732             if (target == 0 || GET_MODE (target) != compute_mode)
4733               target = gen_reg_rtx (compute_mode);
4734
4735             if (rem_flag)
4736               {
4737                 remainder = (REG_P (target)
4738                              ? target : gen_reg_rtx (compute_mode));
4739                 quotient = gen_reg_rtx (compute_mode);
4740               }
4741             else
4742               {
4743                 quotient = (REG_P (target)
4744                             ? target : gen_reg_rtx (compute_mode));
4745                 remainder = gen_reg_rtx (compute_mode);
4746               }
4747
4748             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4749                                      remainder, 1))
4750               {
4751                 /* This could be computed with a branch-less sequence.
4752                    Save that for later.  */
4753                 rtx_code_label *label = gen_label_rtx ();
4754                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4755                                  compute_mode, label);
4756                 expand_inc (quotient, const1_rtx);
4757                 expand_dec (remainder, op1);
4758                 emit_label (label);
4759                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4760               }
4761
4762             /* No luck with division elimination or divmod.  Have to do it
4763                by conditionally adjusting op0 *and* the result.  */
4764             {
4765               rtx_code_label *label1, *label2;
4766               rtx adjusted_op0, tem;
4767
4768               quotient = gen_reg_rtx (compute_mode);
4769               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4770               label1 = gen_label_rtx ();
4771               label2 = gen_label_rtx ();
4772               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4773                                compute_mode, label1);
4774               emit_move_insn  (quotient, const0_rtx);
4775               emit_jump_insn (targetm.gen_jump (label2));
4776               emit_barrier ();
4777               emit_label (label1);
4778               expand_dec (adjusted_op0, const1_rtx);
4779               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4780                                   quotient, 1, OPTAB_LIB_WIDEN);
4781               if (tem != quotient)
4782                 emit_move_insn (quotient, tem);
4783               expand_inc (quotient, const1_rtx);
4784               emit_label (label2);
4785             }
4786           }
4787         else /* signed */
4788           {
4789             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4790                 && INTVAL (op1) >= 0)
4791               {
4792                 /* This is extremely similar to the code for the unsigned case
4793                    above.  For 2.7 we should merge these variants, but for
4794                    2.6.1 I don't want to touch the code for unsigned since that
4795                    get used in C.  The signed case will only be used by other
4796                    languages (Ada).  */
4797
4798                 rtx t1, t2, t3;
4799                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4800                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4801                                    floor_log2 (d), tquotient, 0);
4802                 t2 = expand_binop (compute_mode, and_optab, op0,
4803                                    gen_int_mode (d - 1, compute_mode),
4804                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4805                 t3 = gen_reg_rtx (compute_mode);
4806                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4807                                       compute_mode, 1, 1);
4808                 if (t3 == 0)
4809                   {
4810                     rtx_code_label *lab;
4811                     lab = gen_label_rtx ();
4812                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4813                     expand_inc (t1, const1_rtx);
4814                     emit_label (lab);
4815                     quotient = t1;
4816                   }
4817                 else
4818                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4819                                                           t1, t3),
4820                                             tquotient);
4821                 break;
4822               }
4823
4824             /* Try using an instruction that produces both the quotient and
4825                remainder, using truncation.  We can easily compensate the
4826                quotient or remainder to get ceiling rounding, once we have the
4827                remainder.  Notice that we compute also the final remainder
4828                value here, and return the result right away.  */
4829             if (target == 0 || GET_MODE (target) != compute_mode)
4830               target = gen_reg_rtx (compute_mode);
4831             if (rem_flag)
4832               {
4833                 remainder= (REG_P (target)
4834                             ? target : gen_reg_rtx (compute_mode));
4835                 quotient = gen_reg_rtx (compute_mode);
4836               }
4837             else
4838               {
4839                 quotient = (REG_P (target)
4840                             ? target : gen_reg_rtx (compute_mode));
4841                 remainder = gen_reg_rtx (compute_mode);
4842               }
4843
4844             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4845                                      remainder, 0))
4846               {
4847                 /* This could be computed with a branch-less sequence.
4848                    Save that for later.  */
4849                 rtx tem;
4850                 rtx_code_label *label = gen_label_rtx ();
4851                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4852                                  compute_mode, label);
4853                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4854                                     NULL_RTX, 0, OPTAB_WIDEN);
4855                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4856                 expand_inc (quotient, const1_rtx);
4857                 expand_dec (remainder, op1);
4858                 emit_label (label);
4859                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4860               }
4861
4862             /* No luck with division elimination or divmod.  Have to do it
4863                by conditionally adjusting op0 *and* the result.  */
4864             {
4865               rtx_code_label *label1, *label2, *label3, *label4, *label5;
4866               rtx adjusted_op0;
4867               rtx tem;
4868
4869               quotient = gen_reg_rtx (compute_mode);
4870               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4871               label1 = gen_label_rtx ();
4872               label2 = gen_label_rtx ();
4873               label3 = gen_label_rtx ();
4874               label4 = gen_label_rtx ();
4875               label5 = gen_label_rtx ();
4876               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4877               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4878                                compute_mode, label1);
4879               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4880                                   quotient, 0, OPTAB_LIB_WIDEN);
4881               if (tem != quotient)
4882                 emit_move_insn (quotient, tem);
4883               emit_jump_insn (targetm.gen_jump (label5));
4884               emit_barrier ();
4885               emit_label (label1);
4886               expand_dec (adjusted_op0, const1_rtx);
4887               emit_jump_insn (targetm.gen_jump (label4));
4888               emit_barrier ();
4889               emit_label (label2);
4890               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4891                                compute_mode, label3);
4892               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4893                                   quotient, 0, OPTAB_LIB_WIDEN);
4894               if (tem != quotient)
4895                 emit_move_insn (quotient, tem);
4896               emit_jump_insn (targetm.gen_jump (label5));
4897               emit_barrier ();
4898               emit_label (label3);
4899               expand_inc (adjusted_op0, const1_rtx);
4900               emit_label (label4);
4901               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4902                                   quotient, 0, OPTAB_LIB_WIDEN);
4903               if (tem != quotient)
4904                 emit_move_insn (quotient, tem);
4905               expand_inc (quotient, const1_rtx);
4906               emit_label (label5);
4907             }
4908           }
4909         break;
4910
4911       case EXACT_DIV_EXPR:
4912         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4913           {
4914             HOST_WIDE_INT d = INTVAL (op1);
4915             unsigned HOST_WIDE_INT ml;
4916             int pre_shift;
4917             rtx t1;
4918
4919             pre_shift = floor_log2 (d & -d);
4920             ml = invert_mod2n (d >> pre_shift, size);
4921             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4922                                pre_shift, NULL_RTX, unsignedp);
4923             quotient = expand_mult (compute_mode, t1,
4924                                     gen_int_mode (ml, compute_mode),
4925                                     NULL_RTX, 1);
4926
4927             insn = get_last_insn ();
4928             set_dst_reg_note (insn, REG_EQUAL,
4929                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4930                                               compute_mode, op0, op1),
4931                               quotient);
4932           }
4933         break;
4934
4935       case ROUND_DIV_EXPR:
4936       case ROUND_MOD_EXPR:
4937         if (unsignedp)
4938           {
4939             rtx tem;
4940             rtx_code_label *label;
4941             label = gen_label_rtx ();
4942             quotient = gen_reg_rtx (compute_mode);
4943             remainder = gen_reg_rtx (compute_mode);
4944             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4945               {
4946                 rtx tem;
4947                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4948                                          quotient, 1, OPTAB_LIB_WIDEN);
4949                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4950                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4951                                           remainder, 1, OPTAB_LIB_WIDEN);
4952               }
4953             tem = plus_constant (compute_mode, op1, -1);
4954             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4955             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4956             expand_inc (quotient, const1_rtx);
4957             expand_dec (remainder, op1);
4958             emit_label (label);
4959           }
4960         else
4961           {
4962             rtx abs_rem, abs_op1, tem, mask;
4963             rtx_code_label *label;
4964             label = gen_label_rtx ();
4965             quotient = gen_reg_rtx (compute_mode);
4966             remainder = gen_reg_rtx (compute_mode);
4967             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4968               {
4969                 rtx tem;
4970                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4971                                          quotient, 0, OPTAB_LIB_WIDEN);
4972                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4973                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4974                                           remainder, 0, OPTAB_LIB_WIDEN);
4975               }
4976             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4977             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4978             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4979                                 1, NULL_RTX, 1);
4980             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4981             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4982                                 NULL_RTX, 0, OPTAB_WIDEN);
4983             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4984                                  size - 1, NULL_RTX, 0);
4985             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4986                                 NULL_RTX, 0, OPTAB_WIDEN);
4987             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4988                                 NULL_RTX, 0, OPTAB_WIDEN);
4989             expand_inc (quotient, tem);
4990             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4991                                 NULL_RTX, 0, OPTAB_WIDEN);
4992             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4993                                 NULL_RTX, 0, OPTAB_WIDEN);
4994             expand_dec (remainder, tem);
4995             emit_label (label);
4996           }
4997         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4998
4999       default:
5000         gcc_unreachable ();
5001       }
5002
5003   if (quotient == 0)
5004     {
5005       if (target && GET_MODE (target) != compute_mode)
5006         target = 0;
5007
5008       if (rem_flag)
5009         {
5010           /* Try to produce the remainder without producing the quotient.
5011              If we seem to have a divmod pattern that does not require widening,
5012              don't try widening here.  We should really have a WIDEN argument
5013              to expand_twoval_binop, since what we'd really like to do here is
5014              1) try a mod insn in compute_mode
5015              2) try a divmod insn in compute_mode
5016              3) try a div insn in compute_mode and multiply-subtract to get
5017                 remainder
5018              4) try the same things with widening allowed.  */
5019           remainder
5020             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5021                                  op0, op1, target,
5022                                  unsignedp,
5023                                  ((optab_handler (optab2, compute_mode)
5024                                    != CODE_FOR_nothing)
5025                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
5026           if (remainder == 0)
5027             {
5028               /* No luck there.  Can we do remainder and divide at once
5029                  without a library call?  */
5030               remainder = gen_reg_rtx (compute_mode);
5031               if (! expand_twoval_binop ((unsignedp
5032                                           ? udivmod_optab
5033                                           : sdivmod_optab),
5034                                          op0, op1,
5035                                          NULL_RTX, remainder, unsignedp))
5036                 remainder = 0;
5037             }
5038
5039           if (remainder)
5040             return gen_lowpart (mode, remainder);
5041         }
5042
5043       /* Produce the quotient.  Try a quotient insn, but not a library call.
5044          If we have a divmod in this mode, use it in preference to widening
5045          the div (for this test we assume it will not fail). Note that optab2
5046          is set to the one of the two optabs that the call below will use.  */
5047       quotient
5048         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5049                              op0, op1, rem_flag ? NULL_RTX : target,
5050                              unsignedp,
5051                              ((optab_handler (optab2, compute_mode)
5052                                != CODE_FOR_nothing)
5053                               ? OPTAB_DIRECT : OPTAB_WIDEN));
5054
5055       if (quotient == 0)
5056         {
5057           /* No luck there.  Try a quotient-and-remainder insn,
5058              keeping the quotient alone.  */
5059           quotient = gen_reg_rtx (compute_mode);
5060           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5061                                      op0, op1,
5062                                      quotient, NULL_RTX, unsignedp))
5063             {
5064               quotient = 0;
5065               if (! rem_flag)
5066                 /* Still no luck.  If we are not computing the remainder,
5067                    use a library call for the quotient.  */
5068                 quotient = sign_expand_binop (compute_mode,
5069                                               udiv_optab, sdiv_optab,
5070                                               op0, op1, target,
5071                                               unsignedp, OPTAB_LIB_WIDEN);
5072             }
5073         }
5074     }
5075
5076   if (rem_flag)
5077     {
5078       if (target && GET_MODE (target) != compute_mode)
5079         target = 0;
5080
5081       if (quotient == 0)
5082         {
5083           /* No divide instruction either.  Use library for remainder.  */
5084           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5085                                          op0, op1, target,
5086                                          unsignedp, OPTAB_LIB_WIDEN);
5087           /* No remainder function.  Try a quotient-and-remainder
5088              function, keeping the remainder.  */
5089           if (!remainder)
5090             {
5091               remainder = gen_reg_rtx (compute_mode);
5092               if (!expand_twoval_binop_libfunc
5093                   (unsignedp ? udivmod_optab : sdivmod_optab,
5094                    op0, op1,
5095                    NULL_RTX, remainder,
5096                    unsignedp ? UMOD : MOD))
5097                 remainder = NULL_RTX;
5098             }
5099         }
5100       else
5101         {
5102           /* We divided.  Now finish doing X - Y * (X / Y).  */
5103           remainder = expand_mult (compute_mode, quotient, op1,
5104                                    NULL_RTX, unsignedp);
5105           remainder = expand_binop (compute_mode, sub_optab, op0,
5106                                     remainder, target, unsignedp,
5107                                     OPTAB_LIB_WIDEN);
5108         }
5109     }
5110
5111   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5112 }
5113 \f
5114 /* Return a tree node with data type TYPE, describing the value of X.
5115    Usually this is an VAR_DECL, if there is no obvious better choice.
5116    X may be an expression, however we only support those expressions
5117    generated by loop.c.  */
5118
5119 tree
5120 make_tree (tree type, rtx x)
5121 {
5122   tree t;
5123
5124   switch (GET_CODE (x))
5125     {
5126     case CONST_INT:
5127     case CONST_WIDE_INT:
5128       t = wide_int_to_tree (type, std::make_pair (x, TYPE_MODE (type)));
5129       return t;
5130
5131     case CONST_DOUBLE:
5132       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5133       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5134         t = wide_int_to_tree (type,
5135                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5136                                                     HOST_BITS_PER_WIDE_INT * 2));
5137       else
5138         t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5139
5140       return t;
5141
5142     case CONST_VECTOR:
5143       {
5144         int units = CONST_VECTOR_NUNITS (x);
5145         tree itype = TREE_TYPE (type);
5146         tree *elts;
5147         int i;
5148
5149         /* Build a tree with vector elements.  */
5150         elts = XALLOCAVEC (tree, units);
5151         for (i = units - 1; i >= 0; --i)
5152           {
5153             rtx elt = CONST_VECTOR_ELT (x, i);
5154             elts[i] = make_tree (itype, elt);
5155           }
5156
5157         return build_vector (type, elts);
5158       }
5159
5160     case PLUS:
5161       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5162                           make_tree (type, XEXP (x, 1)));
5163
5164     case MINUS:
5165       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5166                           make_tree (type, XEXP (x, 1)));
5167
5168     case NEG:
5169       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5170
5171     case MULT:
5172       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5173                           make_tree (type, XEXP (x, 1)));
5174
5175     case ASHIFT:
5176       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5177                           make_tree (type, XEXP (x, 1)));
5178
5179     case LSHIFTRT:
5180       t = unsigned_type_for (type);
5181       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5182                                          make_tree (t, XEXP (x, 0)),
5183                                          make_tree (type, XEXP (x, 1))));
5184
5185     case ASHIFTRT:
5186       t = signed_type_for (type);
5187       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5188                                          make_tree (t, XEXP (x, 0)),
5189                                          make_tree (type, XEXP (x, 1))));
5190
5191     case DIV:
5192       if (TREE_CODE (type) != REAL_TYPE)
5193         t = signed_type_for (type);
5194       else
5195         t = type;
5196
5197       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5198                                          make_tree (t, XEXP (x, 0)),
5199                                          make_tree (t, XEXP (x, 1))));
5200     case UDIV:
5201       t = unsigned_type_for (type);
5202       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5203                                          make_tree (t, XEXP (x, 0)),
5204                                          make_tree (t, XEXP (x, 1))));
5205
5206     case SIGN_EXTEND:
5207     case ZERO_EXTEND:
5208       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5209                                           GET_CODE (x) == ZERO_EXTEND);
5210       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5211
5212     case CONST:
5213       return make_tree (type, XEXP (x, 0));
5214
5215     case SYMBOL_REF:
5216       t = SYMBOL_REF_DECL (x);
5217       if (t)
5218         return fold_convert (type, build_fold_addr_expr (t));
5219       /* else fall through.  */
5220
5221     default:
5222       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5223
5224       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5225          address mode to pointer mode.  */
5226       if (POINTER_TYPE_P (type))
5227         x = convert_memory_address_addr_space
5228               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5229
5230       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5231          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5232       t->decl_with_rtl.rtl = x;
5233
5234       return t;
5235     }
5236 }
5237 \f
5238 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5239    and returning TARGET.
5240
5241    If TARGET is 0, a pseudo-register or constant is returned.  */
5242
5243 rtx
5244 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5245 {
5246   rtx tem = 0;
5247
5248   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5249     tem = simplify_binary_operation (AND, mode, op0, op1);
5250   if (tem == 0)
5251     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5252
5253   if (target == 0)
5254     target = tem;
5255   else if (tem != target)
5256     emit_move_insn (target, tem);
5257   return target;
5258 }
5259
5260 /* Helper function for emit_store_flag.  */
5261 rtx
5262 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5263              machine_mode mode, machine_mode compare_mode,
5264              int unsignedp, rtx x, rtx y, int normalizep,
5265              machine_mode target_mode)
5266 {
5267   struct expand_operand ops[4];
5268   rtx op0, comparison, subtarget;
5269   rtx_insn *last;
5270   machine_mode result_mode = targetm.cstore_mode (icode);
5271
5272   last = get_last_insn ();
5273   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5274   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5275   if (!x || !y)
5276     {
5277       delete_insns_since (last);
5278       return NULL_RTX;
5279     }
5280
5281   if (target_mode == VOIDmode)
5282     target_mode = result_mode;
5283   if (!target)
5284     target = gen_reg_rtx (target_mode);
5285
5286   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5287
5288   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5289   create_fixed_operand (&ops[1], comparison);
5290   create_fixed_operand (&ops[2], x);
5291   create_fixed_operand (&ops[3], y);
5292   if (!maybe_expand_insn (icode, 4, ops))
5293     {
5294       delete_insns_since (last);
5295       return NULL_RTX;
5296     }
5297   subtarget = ops[0].value;
5298
5299   /* If we are converting to a wider mode, first convert to
5300      TARGET_MODE, then normalize.  This produces better combining
5301      opportunities on machines that have a SIGN_EXTRACT when we are
5302      testing a single bit.  This mostly benefits the 68k.
5303
5304      If STORE_FLAG_VALUE does not have the sign bit set when
5305      interpreted in MODE, we can do this conversion as unsigned, which
5306      is usually more efficient.  */
5307   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5308     {
5309       convert_move (target, subtarget,
5310                     val_signbit_known_clear_p (result_mode,
5311                                                STORE_FLAG_VALUE));
5312       op0 = target;
5313       result_mode = target_mode;
5314     }
5315   else
5316     op0 = subtarget;
5317
5318   /* If we want to keep subexpressions around, don't reuse our last
5319      target.  */
5320   if (optimize)
5321     subtarget = 0;
5322
5323   /* Now normalize to the proper value in MODE.  Sometimes we don't
5324      have to do anything.  */
5325   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5326     ;
5327   /* STORE_FLAG_VALUE might be the most negative number, so write
5328      the comparison this way to avoid a compiler-time warning.  */
5329   else if (- normalizep == STORE_FLAG_VALUE)
5330     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5331
5332   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5333      it hard to use a value of just the sign bit due to ANSI integer
5334      constant typing rules.  */
5335   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5336     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5337                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5338                         normalizep == 1);
5339   else
5340     {
5341       gcc_assert (STORE_FLAG_VALUE & 1);
5342
5343       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5344       if (normalizep == -1)
5345         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5346     }
5347
5348   /* If we were converting to a smaller mode, do the conversion now.  */
5349   if (target_mode != result_mode)
5350     {
5351       convert_move (target, op0, 0);
5352       return target;
5353     }
5354   else
5355     return op0;
5356 }
5357
5358
5359 /* A subroutine of emit_store_flag only including "tricks" that do not
5360    need a recursive call.  These are kept separate to avoid infinite
5361    loops.  */
5362
5363 static rtx
5364 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5365                    machine_mode mode, int unsignedp, int normalizep,
5366                    machine_mode target_mode)
5367 {
5368   rtx subtarget;
5369   enum insn_code icode;
5370   machine_mode compare_mode;
5371   enum mode_class mclass;
5372   enum rtx_code scode;
5373
5374   if (unsignedp)
5375     code = unsigned_condition (code);
5376   scode = swap_condition (code);
5377
5378   /* If one operand is constant, make it the second one.  Only do this
5379      if the other operand is not constant as well.  */
5380
5381   if (swap_commutative_operands_p (op0, op1))
5382     {
5383       std::swap (op0, op1);
5384       code = swap_condition (code);
5385     }
5386
5387   if (mode == VOIDmode)
5388     mode = GET_MODE (op0);
5389
5390   /* For some comparisons with 1 and -1, we can convert this to
5391      comparisons with zero.  This will often produce more opportunities for
5392      store-flag insns.  */
5393
5394   switch (code)
5395     {
5396     case LT:
5397       if (op1 == const1_rtx)
5398         op1 = const0_rtx, code = LE;
5399       break;
5400     case LE:
5401       if (op1 == constm1_rtx)
5402         op1 = const0_rtx, code = LT;
5403       break;
5404     case GE:
5405       if (op1 == const1_rtx)
5406         op1 = const0_rtx, code = GT;
5407       break;
5408     case GT:
5409       if (op1 == constm1_rtx)
5410         op1 = const0_rtx, code = GE;
5411       break;
5412     case GEU:
5413       if (op1 == const1_rtx)
5414         op1 = const0_rtx, code = NE;
5415       break;
5416     case LTU:
5417       if (op1 == const1_rtx)
5418         op1 = const0_rtx, code = EQ;
5419       break;
5420     default:
5421       break;
5422     }
5423
5424   /* If we are comparing a double-word integer with zero or -1, we can
5425      convert the comparison into one involving a single word.  */
5426   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5427       && GET_MODE_CLASS (mode) == MODE_INT
5428       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5429     {
5430       rtx tem;
5431       if ((code == EQ || code == NE)
5432           && (op1 == const0_rtx || op1 == constm1_rtx))
5433         {
5434           rtx op00, op01;
5435
5436           /* Do a logical OR or AND of the two words and compare the
5437              result.  */
5438           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5439           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5440           tem = expand_binop (word_mode,
5441                               op1 == const0_rtx ? ior_optab : and_optab,
5442                               op00, op01, NULL_RTX, unsignedp,
5443                               OPTAB_DIRECT);
5444
5445           if (tem != 0)
5446             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5447                                    unsignedp, normalizep);
5448         }
5449       else if ((code == LT || code == GE) && op1 == const0_rtx)
5450         {
5451           rtx op0h;
5452
5453           /* If testing the sign bit, can just test on high word.  */
5454           op0h = simplify_gen_subreg (word_mode, op0, mode,
5455                                       subreg_highpart_offset (word_mode,
5456                                                               mode));
5457           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5458                                  unsignedp, normalizep);
5459         }
5460       else
5461         tem = NULL_RTX;
5462
5463       if (tem)
5464         {
5465           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5466             return tem;
5467           if (!target)
5468             target = gen_reg_rtx (target_mode);
5469
5470           convert_move (target, tem,
5471                         !val_signbit_known_set_p (word_mode,
5472                                                   (normalizep ? normalizep
5473                                                    : STORE_FLAG_VALUE)));
5474           return target;
5475         }
5476     }
5477
5478   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5479      complement of A (for GE) and shifting the sign bit to the low bit.  */
5480   if (op1 == const0_rtx && (code == LT || code == GE)
5481       && GET_MODE_CLASS (mode) == MODE_INT
5482       && (normalizep || STORE_FLAG_VALUE == 1
5483           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5484     {
5485       subtarget = target;
5486
5487       if (!target)
5488         target_mode = mode;
5489
5490       /* If the result is to be wider than OP0, it is best to convert it
5491          first.  If it is to be narrower, it is *incorrect* to convert it
5492          first.  */
5493       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5494         {
5495           op0 = convert_modes (target_mode, mode, op0, 0);
5496           mode = target_mode;
5497         }
5498
5499       if (target_mode != mode)
5500         subtarget = 0;
5501
5502       if (code == GE)
5503         op0 = expand_unop (mode, one_cmpl_optab, op0,
5504                            ((STORE_FLAG_VALUE == 1 || normalizep)
5505                             ? 0 : subtarget), 0);
5506
5507       if (STORE_FLAG_VALUE == 1 || normalizep)
5508         /* If we are supposed to produce a 0/1 value, we want to do
5509            a logical shift from the sign bit to the low-order bit; for
5510            a -1/0 value, we do an arithmetic shift.  */
5511         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5512                             GET_MODE_BITSIZE (mode) - 1,
5513                             subtarget, normalizep != -1);
5514
5515       if (mode != target_mode)
5516         op0 = convert_modes (target_mode, mode, op0, 0);
5517
5518       return op0;
5519     }
5520
5521   mclass = GET_MODE_CLASS (mode);
5522   for (compare_mode = mode; compare_mode != VOIDmode;
5523        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5524     {
5525      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5526      icode = optab_handler (cstore_optab, optab_mode);
5527      if (icode != CODE_FOR_nothing)
5528         {
5529           do_pending_stack_adjust ();
5530           rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5531                                  unsignedp, op0, op1, normalizep, target_mode);
5532           if (tem)
5533             return tem;
5534
5535           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5536             {
5537               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5538                                  unsignedp, op1, op0, normalizep, target_mode);
5539               if (tem)
5540                 return tem;
5541             }
5542           break;
5543         }
5544     }
5545
5546   return 0;
5547 }
5548
5549 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5550    and storing in TARGET.  Normally return TARGET.
5551    Return 0 if that cannot be done.
5552
5553    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5554    it is VOIDmode, they cannot both be CONST_INT.
5555
5556    UNSIGNEDP is for the case where we have to widen the operands
5557    to perform the operation.  It says to use zero-extension.
5558
5559    NORMALIZEP is 1 if we should convert the result to be either zero
5560    or one.  Normalize is -1 if we should convert the result to be
5561    either zero or -1.  If NORMALIZEP is zero, the result will be left
5562    "raw" out of the scc insn.  */
5563
5564 rtx
5565 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5566                  machine_mode mode, int unsignedp, int normalizep)
5567 {
5568   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5569   enum rtx_code rcode;
5570   rtx subtarget;
5571   rtx tem, trueval;
5572   rtx_insn *last;
5573
5574   /* If we compare constants, we shouldn't use a store-flag operation,
5575      but a constant load.  We can get there via the vanilla route that
5576      usually generates a compare-branch sequence, but will in this case
5577      fold the comparison to a constant, and thus elide the branch.  */
5578   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5579     return NULL_RTX;
5580
5581   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5582                            target_mode);
5583   if (tem)
5584     return tem;
5585
5586   /* If we reached here, we can't do this with a scc insn, however there
5587      are some comparisons that can be done in other ways.  Don't do any
5588      of these cases if branches are very cheap.  */
5589   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5590     return 0;
5591
5592   /* See what we need to return.  We can only return a 1, -1, or the
5593      sign bit.  */
5594
5595   if (normalizep == 0)
5596     {
5597       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5598         normalizep = STORE_FLAG_VALUE;
5599
5600       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5601         ;
5602       else
5603         return 0;
5604     }
5605
5606   last = get_last_insn ();
5607
5608   /* If optimizing, use different pseudo registers for each insn, instead
5609      of reusing the same pseudo.  This leads to better CSE, but slows
5610      down the compiler, since there are more pseudos */
5611   subtarget = (!optimize
5612                && (target_mode == mode)) ? target : NULL_RTX;
5613   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5614
5615   /* For floating-point comparisons, try the reverse comparison or try
5616      changing the "orderedness" of the comparison.  */
5617   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5618     {
5619       enum rtx_code first_code;
5620       bool and_them;
5621
5622       rcode = reverse_condition_maybe_unordered (code);
5623       if (can_compare_p (rcode, mode, ccp_store_flag)
5624           && (code == ORDERED || code == UNORDERED
5625               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5626               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5627         {
5628           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5629                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5630
5631           /* For the reverse comparison, use either an addition or a XOR.  */
5632           if (want_add
5633               && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5634                            optimize_insn_for_speed_p ()) == 0)
5635             {
5636               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5637                                        STORE_FLAG_VALUE, target_mode);
5638               if (tem)
5639                 return expand_binop (target_mode, add_optab, tem,
5640                                      gen_int_mode (normalizep, target_mode),
5641                                      target, 0, OPTAB_WIDEN);
5642             }
5643           else if (!want_add
5644                    && rtx_cost (trueval, mode, XOR, 1,
5645                                 optimize_insn_for_speed_p ()) == 0)
5646             {
5647               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5648                                        normalizep, target_mode);
5649               if (tem)
5650                 return expand_binop (target_mode, xor_optab, tem, trueval,
5651                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5652             }
5653         }
5654
5655       delete_insns_since (last);
5656
5657       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5658       if (code == ORDERED || code == UNORDERED)
5659         return 0;
5660
5661       and_them = split_comparison (code, mode, &first_code, &code);
5662
5663       /* If there are no NaNs, the first comparison should always fall through.
5664          Effectively change the comparison to the other one.  */
5665       if (!HONOR_NANS (mode))
5666         {
5667           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5668           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5669                                     target_mode);
5670         }
5671
5672       if (!HAVE_conditional_move)
5673         return 0;
5674
5675       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5676          conditional move.  */
5677       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5678                                normalizep, target_mode);
5679       if (tem == 0)
5680         return 0;
5681
5682       if (and_them)
5683         tem = emit_conditional_move (target, code, op0, op1, mode,
5684                                      tem, const0_rtx, GET_MODE (tem), 0);
5685       else
5686         tem = emit_conditional_move (target, code, op0, op1, mode,
5687                                      trueval, tem, GET_MODE (tem), 0);
5688
5689       if (tem == 0)
5690         delete_insns_since (last);
5691       return tem;
5692     }
5693
5694   /* The remaining tricks only apply to integer comparisons.  */
5695
5696   if (GET_MODE_CLASS (mode) != MODE_INT)
5697     return 0;
5698
5699   /* If this is an equality comparison of integers, we can try to exclusive-or
5700      (or subtract) the two operands and use a recursive call to try the
5701      comparison with zero.  Don't do any of these cases if branches are
5702      very cheap.  */
5703
5704   if ((code == EQ || code == NE) && op1 != const0_rtx)
5705     {
5706       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5707                           OPTAB_WIDEN);
5708
5709       if (tem == 0)
5710         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5711                             OPTAB_WIDEN);
5712       if (tem != 0)
5713         tem = emit_store_flag (target, code, tem, const0_rtx,
5714                                mode, unsignedp, normalizep);
5715       if (tem != 0)
5716         return tem;
5717
5718       delete_insns_since (last);
5719     }
5720
5721   /* For integer comparisons, try the reverse comparison.  However, for
5722      small X and if we'd have anyway to extend, implementing "X != 0"
5723      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5724   rcode = reverse_condition (code);
5725   if (can_compare_p (rcode, mode, ccp_store_flag)
5726       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5727             && code == NE
5728             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5729             && op1 == const0_rtx))
5730     {
5731       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5732                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5733
5734       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5735       if (want_add
5736           && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5737                        optimize_insn_for_speed_p ()) == 0)
5738         {
5739           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5740                                    STORE_FLAG_VALUE, target_mode);
5741           if (tem != 0)
5742             tem = expand_binop (target_mode, add_optab, tem,
5743                                 gen_int_mode (normalizep, target_mode),
5744                                 target, 0, OPTAB_WIDEN);
5745         }
5746       else if (!want_add
5747                && rtx_cost (trueval, mode, XOR, 1,
5748                             optimize_insn_for_speed_p ()) == 0)
5749         {
5750           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5751                                    normalizep, target_mode);
5752           if (tem != 0)
5753             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5754                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5755         }
5756
5757       if (tem != 0)
5758         return tem;
5759       delete_insns_since (last);
5760     }
5761
5762   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5763      the constant zero.  Reject all other comparisons at this point.  Only
5764      do LE and GT if branches are expensive since they are expensive on
5765      2-operand machines.  */
5766
5767   if (op1 != const0_rtx
5768       || (code != EQ && code != NE
5769           && (BRANCH_COST (optimize_insn_for_speed_p (),
5770                            false) <= 1 || (code != LE && code != GT))))
5771     return 0;
5772
5773   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5774      do the necessary operation below.  */
5775
5776   tem = 0;
5777
5778   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5779      the sign bit set.  */
5780
5781   if (code == LE)
5782     {
5783       /* This is destructive, so SUBTARGET can't be OP0.  */
5784       if (rtx_equal_p (subtarget, op0))
5785         subtarget = 0;
5786
5787       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5788                           OPTAB_WIDEN);
5789       if (tem)
5790         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5791                             OPTAB_WIDEN);
5792     }
5793
5794   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5795      number of bits in the mode of OP0, minus one.  */
5796
5797   if (code == GT)
5798     {
5799       if (rtx_equal_p (subtarget, op0))
5800         subtarget = 0;
5801
5802       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5803                           GET_MODE_BITSIZE (mode) - 1,
5804                           subtarget, 0);
5805       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5806                           OPTAB_WIDEN);
5807     }
5808
5809   if (code == EQ || code == NE)
5810     {
5811       /* For EQ or NE, one way to do the comparison is to apply an operation
5812          that converts the operand into a positive number if it is nonzero
5813          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5814          for NE we negate.  This puts the result in the sign bit.  Then we
5815          normalize with a shift, if needed.
5816
5817          Two operations that can do the above actions are ABS and FFS, so try
5818          them.  If that doesn't work, and MODE is smaller than a full word,
5819          we can use zero-extension to the wider mode (an unsigned conversion)
5820          as the operation.  */
5821
5822       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5823          that is compensated by the subsequent overflow when subtracting
5824          one / negating.  */
5825
5826       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5827         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5828       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5829         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5830       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5831         {
5832           tem = convert_modes (word_mode, mode, op0, 1);
5833           mode = word_mode;
5834         }
5835
5836       if (tem != 0)
5837         {
5838           if (code == EQ)
5839             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5840                                 0, OPTAB_WIDEN);
5841           else
5842             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5843         }
5844
5845       /* If we couldn't do it that way, for NE we can "or" the two's complement
5846          of the value with itself.  For EQ, we take the one's complement of
5847          that "or", which is an extra insn, so we only handle EQ if branches
5848          are expensive.  */
5849
5850       if (tem == 0
5851           && (code == NE
5852               || BRANCH_COST (optimize_insn_for_speed_p (),
5853                               false) > 1))
5854         {
5855           if (rtx_equal_p (subtarget, op0))
5856             subtarget = 0;
5857
5858           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5859           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5860                               OPTAB_WIDEN);
5861
5862           if (tem && code == EQ)
5863             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5864         }
5865     }
5866
5867   if (tem && normalizep)
5868     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5869                         GET_MODE_BITSIZE (mode) - 1,
5870                         subtarget, normalizep == 1);
5871
5872   if (tem)
5873     {
5874       if (!target)
5875         ;
5876       else if (GET_MODE (tem) != target_mode)
5877         {
5878           convert_move (target, tem, 0);
5879           tem = target;
5880         }
5881       else if (!subtarget)
5882         {
5883           emit_move_insn (target, tem);
5884           tem = target;
5885         }
5886     }
5887   else
5888     delete_insns_since (last);
5889
5890   return tem;
5891 }
5892
5893 /* Like emit_store_flag, but always succeeds.  */
5894
5895 rtx
5896 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5897                        machine_mode mode, int unsignedp, int normalizep)
5898 {
5899   rtx tem;
5900   rtx_code_label *label;
5901   rtx trueval, falseval;
5902
5903   /* First see if emit_store_flag can do the job.  */
5904   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5905   if (tem != 0)
5906     return tem;
5907
5908   if (!target)
5909     target = gen_reg_rtx (word_mode);
5910
5911   /* If this failed, we have to do this with set/compare/jump/set code.
5912      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5913   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5914   if (code == NE
5915       && GET_MODE_CLASS (mode) == MODE_INT
5916       && REG_P (target)
5917       && op0 == target
5918       && op1 == const0_rtx)
5919     {
5920       label = gen_label_rtx ();
5921       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
5922                                NULL_RTX, NULL, label, -1);
5923       emit_move_insn (target, trueval);
5924       emit_label (label);
5925       return target;
5926     }
5927
5928   if (!REG_P (target)
5929       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5930     target = gen_reg_rtx (GET_MODE (target));
5931
5932   /* Jump in the right direction if the target cannot implement CODE
5933      but can jump on its reverse condition.  */
5934   falseval = const0_rtx;
5935   if (! can_compare_p (code, mode, ccp_jump)
5936       && (! FLOAT_MODE_P (mode)
5937           || code == ORDERED || code == UNORDERED
5938           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5939           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5940     {
5941       enum rtx_code rcode;
5942       if (FLOAT_MODE_P (mode))
5943         rcode = reverse_condition_maybe_unordered (code);
5944       else
5945         rcode = reverse_condition (code);
5946
5947       /* Canonicalize to UNORDERED for the libcall.  */
5948       if (can_compare_p (rcode, mode, ccp_jump)
5949           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5950         {
5951           falseval = trueval;
5952           trueval = const0_rtx;
5953           code = rcode;
5954         }
5955     }
5956
5957   emit_move_insn (target, trueval);
5958   label = gen_label_rtx ();
5959   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
5960                            label, -1);
5961
5962   emit_move_insn (target, falseval);
5963   emit_label (label);
5964
5965   return target;
5966 }
5967 \f
5968 /* Perform possibly multi-word comparison and conditional jump to LABEL
5969    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5970    now a thin wrapper around do_compare_rtx_and_jump.  */
5971
5972 static void
5973 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
5974                  rtx_code_label *label)
5975 {
5976   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5977   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
5978                            NULL, label, -1);
5979 }