gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2016 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "target.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "predict.h"
  30 #include "tm_p.h"
  31 #include "expmed.h"
  32 #include "optabs.h"
  33 #include "emit-rtl.h"
  34 #include "diagnostic-core.h"
  35 #include "fold-const.h"
  36 #include "stor-layout.h"
  37 #include "dojump.h"
  38 #include "explow.h"
  39 #include "expr.h"
  40 #include "langhooks.h"
  41
  42 struct target_expmed default_target_expmed;
  43 #if SWITCHABLE_TARGET
  44 struct target_expmed *this_target_expmed = &default_target_expmed;
  45 #endif
  46
  47 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  48                                    unsigned HOST_WIDE_INT,
  49                                    unsigned HOST_WIDE_INT,
  50                                    unsigned HOST_WIDE_INT,
  51                                    rtx, bool);
  52 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
  53                                      unsigned HOST_WIDE_INT,
  54                                      rtx, bool);
  55 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  56                                    unsigned HOST_WIDE_INT,
  57                                    unsigned HOST_WIDE_INT,
  58                                    unsigned HOST_WIDE_INT,
  59                                    rtx, bool);
  60 static rtx extract_fixed_bit_field (machine_mode, rtx,
  61                                     unsigned HOST_WIDE_INT,
  62                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  63 static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
  64                                       unsigned HOST_WIDE_INT,
  65                                       unsigned HOST_WIDE_INT, rtx, int, bool);
  66 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  67 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  68                                     unsigned HOST_WIDE_INT, int, bool);
  69 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  70 static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  71 static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  72
  73 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  74    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  75    The mask is truncated if necessary to the width of mode MODE.  The
  76    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
  77
  78 static inline rtx
  79 mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
  80 {
  81   return immed_wide_int_const
  82     (wi::shifted_mask (bitpos, bitsize, complement,
  83                        GET_MODE_PRECISION (mode)), mode);
  84 }
  85
  86 /* Test whether a value is zero of a power of two.  */
  87 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
  88   (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
  89
  90 struct init_expmed_rtl
  91 {
  92   rtx reg;
  93   rtx plus;
  94   rtx neg;
  95   rtx mult;
  96   rtx sdiv;
  97   rtx udiv;
  98   rtx sdiv_32;
  99   rtx smod_32;
 100   rtx wide_mult;
 101   rtx wide_lshr;
 102   rtx wide_trunc;
 103   rtx shift;
 104   rtx shift_mult;
 105   rtx shift_add;
 106   rtx shift_sub0;
 107   rtx shift_sub1;
 108   rtx zext;
 109   rtx trunc;
 110
 111   rtx pow2[MAX_BITS_PER_WORD];
 112   rtx cint[MAX_BITS_PER_WORD];
 113 };
 114
 115 static void
 116 init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
 117                       machine_mode from_mode, bool speed)
 118 {
 119   int to_size, from_size;
 120   rtx which;
 121
 122   to_size = GET_MODE_PRECISION (to_mode);
 123   from_size = GET_MODE_PRECISION (from_mode);
 124
 125   /* Most partial integers have a precision less than the "full"
 126      integer it requires for storage.  In case one doesn't, for
 127      comparison purposes here, reduce the bit size by one in that
 128      case.  */
 129   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 130       && exact_log2 (to_size) != -1)
 131     to_size --;
 132   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 133       && exact_log2 (from_size) != -1)
 134     from_size --;
 135
 136   /* Assume cost of zero-extend and sign-extend is the same.  */
 137   which = (to_size < from_size ? all->trunc : all->zext);
 138
 139   PUT_MODE (all->reg, from_mode);
 140   set_convert_cost (to_mode, from_mode, speed,
 141                     set_src_cost (which, to_mode, speed));
 142 }
 143
 144 static void
 145 init_expmed_one_mode (struct init_expmed_rtl *all,
 146                       machine_mode mode, int speed)
 147 {
 148   int m, n, mode_bitsize;
 149   machine_mode mode_from;
 150
 151   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 152
 153   PUT_MODE (all->reg, mode);
 154   PUT_MODE (all->plus, mode);
 155   PUT_MODE (all->neg, mode);
 156   PUT_MODE (all->mult, mode);
 157   PUT_MODE (all->sdiv, mode);
 158   PUT_MODE (all->udiv, mode);
 159   PUT_MODE (all->sdiv_32, mode);
 160   PUT_MODE (all->smod_32, mode);
 161   PUT_MODE (all->wide_trunc, mode);
 162   PUT_MODE (all->shift, mode);
 163   PUT_MODE (all->shift_mult, mode);
 164   PUT_MODE (all->shift_add, mode);
 165   PUT_MODE (all->shift_sub0, mode);
 166   PUT_MODE (all->shift_sub1, mode);
 167   PUT_MODE (all->zext, mode);
 168   PUT_MODE (all->trunc, mode);
 169
 170   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
 171   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
 172   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
 173   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
 174   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
 175
 176   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
 177                                      <= 2 * add_cost (speed, mode)));
 178   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
 179                                      <= 4 * add_cost (speed, mode)));
 180
 181   set_shift_cost (speed, mode, 0, 0);
 182   {
 183     int cost = add_cost (speed, mode);
 184     set_shiftadd_cost (speed, mode, 0, cost);
 185     set_shiftsub0_cost (speed, mode, 0, cost);
 186     set_shiftsub1_cost (speed, mode, 0, cost);
 187   }
 188
 189   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 190   for (m = 1; m < n; m++)
 191     {
 192       XEXP (all->shift, 1) = all->cint[m];
 193       XEXP (all->shift_mult, 1) = all->pow2[m];
 194
 195       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
 196       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
 197                                                        speed));
 198       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
 199                                                         speed));
 200       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
 201                                                         speed));
 202     }
 203
 204   if (SCALAR_INT_MODE_P (mode))
 205     {
 206       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 207            mode_from = (machine_mode)(mode_from + 1))
 208         init_expmed_one_conv (all, mode, mode_from, speed);
 209     }
 210   if (GET_MODE_CLASS (mode) == MODE_INT)
 211     {
 212       machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
 213       if (wider_mode != VOIDmode)
 214         {
 215           PUT_MODE (all->zext, wider_mode);
 216           PUT_MODE (all->wide_mult, wider_mode);
 217           PUT_MODE (all->wide_lshr, wider_mode);
 218           XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 219
 220           set_mul_widen_cost (speed, wider_mode,
 221                               set_src_cost (all->wide_mult, wider_mode, speed));
 222           set_mul_highpart_cost (speed, mode,
 223                                  set_src_cost (all->wide_trunc, mode, speed));
 224         }
 225     }
 226 }
 227
 228 void
 229 init_expmed (void)
 230 {
 231   struct init_expmed_rtl all;
 232   machine_mode mode = QImode;
 233   int m, speed;
 234
 235   memset (&all, 0, sizeof all);
 236   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 237     {
 238       all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
 239       all.cint[m] = GEN_INT (m);
 240     }
 241
 242   /* Avoid using hard regs in ways which may be unsupported.  */
 243   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 244   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 245   all.neg = gen_rtx_NEG (mode, all.reg);
 246   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 247   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 248   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 249   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 250   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 251   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 252   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 253   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 254   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 255   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 256   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 257   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 258   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 259   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 260   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 261
 262   for (speed = 0; speed < 2; speed++)
 263     {
 264       crtl->maybe_hot_insn_p = speed;
 265       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
 266
 267       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 268            mode = (machine_mode)(mode + 1))
 269         init_expmed_one_mode (&all, mode, speed);
 270
 271       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 272         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 273              mode = (machine_mode)(mode + 1))
 274           init_expmed_one_mode (&all, mode, speed);
 275
 276       if (MIN_MODE_VECTOR_INT != VOIDmode)
 277         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 278              mode = (machine_mode)(mode + 1))
 279           init_expmed_one_mode (&all, mode, speed);
 280     }
 281
 282   if (alg_hash_used_p ())
 283     {
 284       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 285       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 286     }
 287   else
 288     set_alg_hash_used_p (true);
 289   default_rtl_profile ();
 290
 291   ggc_free (all.trunc);
 292   ggc_free (all.shift_sub1);
 293   ggc_free (all.shift_sub0);
 294   ggc_free (all.shift_add);
 295   ggc_free (all.shift_mult);
 296   ggc_free (all.shift);
 297   ggc_free (all.wide_trunc);
 298   ggc_free (all.wide_lshr);
 299   ggc_free (all.wide_mult);
 300   ggc_free (all.zext);
 301   ggc_free (all.smod_32);
 302   ggc_free (all.sdiv_32);
 303   ggc_free (all.udiv);
 304   ggc_free (all.sdiv);
 305   ggc_free (all.mult);
 306   ggc_free (all.neg);
 307   ggc_free (all.plus);
 308   ggc_free (all.reg);
 309 }
 310
 311 /* Return an rtx representing minus the value of X.
 312    MODE is the intended mode of the result,
 313    useful if X is a CONST_INT.  */
 314
 315 rtx
 316 negate_rtx (machine_mode mode, rtx x)
 317 {
 318   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 319
 320   if (result == 0)
 321     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 322
 323   return result;
 324 }
 325
 326 /* Whether reverse storage order is supported on the target.  */
 327 static int reverse_storage_order_supported = -1;
 328
 329 /* Check whether reverse storage order is supported on the target.  */
 330
 331 static void
 332 check_reverse_storage_order_support (void)
 333 {
 334   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 335     {
 336       reverse_storage_order_supported = 0;
 337       sorry ("reverse scalar storage order");
 338     }
 339   else
 340     reverse_storage_order_supported = 1;
 341 }
 342
 343 /* Whether reverse FP storage order is supported on the target.  */
 344 static int reverse_float_storage_order_supported = -1;
 345
 346 /* Check whether reverse FP storage order is supported on the target.  */
 347
 348 static void
 349 check_reverse_float_storage_order_support (void)
 350 {
 351   if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 352     {
 353       reverse_float_storage_order_supported = 0;
 354       sorry ("reverse floating-point scalar storage order");
 355     }
 356   else
 357     reverse_float_storage_order_supported = 1;
 358 }
 359
 360 /* Return an rtx representing value of X with reverse storage order.
 361    MODE is the intended mode of the result,
 362    useful if X is a CONST_INT.  */
 363
 364 rtx
 365 flip_storage_order (enum machine_mode mode, rtx x)
 366 {
 367   enum machine_mode int_mode;
 368   rtx result;
 369
 370   if (mode == QImode)
 371     return x;
 372
 373   if (COMPLEX_MODE_P (mode))
 374     {
 375       rtx real = read_complex_part (x, false);
 376       rtx imag = read_complex_part (x, true);
 377
 378       real = flip_storage_order (GET_MODE_INNER (mode), real);
 379       imag = flip_storage_order (GET_MODE_INNER (mode), imag);
 380
 381       return gen_rtx_CONCAT (mode, real, imag);
 382     }
 383
 384   if (__builtin_expect (reverse_storage_order_supported < 0, 0))
 385     check_reverse_storage_order_support ();
 386
 387   if (SCALAR_INT_MODE_P (mode))
 388     int_mode = mode;
 389   else
 390     {
 391       if (FLOAT_MODE_P (mode)
 392           && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
 393         check_reverse_float_storage_order_support ();
 394
 395       int_mode = mode_for_size (GET_MODE_PRECISION (mode), MODE_INT, 0);
 396       if (int_mode == BLKmode)
 397         {
 398           sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
 399           return x;
 400         }
 401       x = gen_lowpart (int_mode, x);
 402     }
 403
 404   result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
 405   if (result == 0)
 406     result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
 407
 408   if (int_mode != mode)
 409     result = gen_lowpart (mode, result);
 410
 411   return result;
 412 }
 413
 414 /* Adjust bitfield memory MEM so that it points to the first unit of mode
 415    MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
 416    If MODE is BLKmode, return a reference to every byte in the bitfield.
 417    Set *NEW_BITNUM to the bit position of the field within the new memory.  */
 418
 419 static rtx
 420 narrow_bit_field_mem (rtx mem, machine_mode mode,
 421                       unsigned HOST_WIDE_INT bitsize,
 422                       unsigned HOST_WIDE_INT bitnum,
 423                       unsigned HOST_WIDE_INT *new_bitnum)
 424 {
 425   if (mode == BLKmode)
 426     {
 427       *new_bitnum = bitnum % BITS_PER_UNIT;
 428       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 429       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 430                             / BITS_PER_UNIT);
 431       return adjust_bitfield_address_size (mem, mode, offset, size);
 432     }
 433   else
 434     {
 435       unsigned int unit = GET_MODE_BITSIZE (mode);
 436       *new_bitnum = bitnum % unit;
 437       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 438       return adjust_bitfield_address (mem, mode, offset);
 439     }
 440 }
 441
 442 /* The caller wants to perform insertion or extraction PATTERN on a
 443    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 444    BITREGION_START and BITREGION_END are as for store_bit_field
 445    and FIELDMODE is the natural mode of the field.
 446
 447    Search for a mode that is compatible with the memory access
 448    restrictions and (where applicable) with a register insertion or
 449    extraction.  Return the new memory on success, storing the adjusted
 450    bit position in *NEW_BITNUM.  Return null otherwise.  */
 451
 452 static rtx
 453 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 454                               rtx op0, HOST_WIDE_INT bitsize,
 455                               HOST_WIDE_INT bitnum,
 456                               unsigned HOST_WIDE_INT bitregion_start,
 457                               unsigned HOST_WIDE_INT bitregion_end,
 458                               machine_mode fieldmode,
 459                               unsigned HOST_WIDE_INT *new_bitnum)
 460 {
 461   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 462                                 bitregion_end, MEM_ALIGN (op0),
 463                                 MEM_VOLATILE_P (op0));
 464   machine_mode best_mode;
 465   if (iter.next_mode (&best_mode))
 466     {
 467       /* We can use a memory in BEST_MODE.  See whether this is true for
 468          any wider modes.  All other things being equal, we prefer to
 469          use the widest mode possible because it tends to expose more
 470          CSE opportunities.  */
 471       if (!iter.prefer_smaller_modes ())
 472         {
 473           /* Limit the search to the mode required by the corresponding
 474              register insertion or extraction instruction, if any.  */
 475           machine_mode limit_mode = word_mode;
 476           extraction_insn insn;
 477           if (get_best_reg_extraction_insn (&insn, pattern,
 478                                             GET_MODE_BITSIZE (best_mode),
 479                                             fieldmode))
 480             limit_mode = insn.field_mode;
 481
 482           machine_mode wider_mode;
 483           while (iter.next_mode (&wider_mode)
 484                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 485             best_mode = wider_mode;
 486         }
 487       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 488                                    new_bitnum);
 489     }
 490   return NULL_RTX;
 491 }
 492
 493 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 494    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 495    offset is then BITNUM / BITS_PER_UNIT.  */
 496
 497 static bool
 498 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 499                      unsigned HOST_WIDE_INT bitsize,
 500                      machine_mode struct_mode)
 501 {
 502   if (BYTES_BIG_ENDIAN)
 503     return (bitnum % BITS_PER_UNIT == 0
 504             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 505                 || (bitnum + bitsize) % BITS_PER_WORD == 0));
 506   else
 507     return bitnum % BITS_PER_WORD == 0;
 508 }
 509
 510 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 511    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 512    Return false if the access would touch memory outside the range
 513    BITREGION_START to BITREGION_END for conformance to the C++ memory
 514    model.  */
 515
 516 static bool
 517 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 518                             unsigned HOST_WIDE_INT bitnum,
 519                             machine_mode fieldmode,
 520                             unsigned HOST_WIDE_INT bitregion_start,
 521                             unsigned HOST_WIDE_INT bitregion_end)
 522 {
 523   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 524
 525   /* -fstrict-volatile-bitfields must be enabled and we must have a
 526      volatile MEM.  */
 527   if (!MEM_P (op0)
 528       || !MEM_VOLATILE_P (op0)
 529       || flag_strict_volatile_bitfields <= 0)
 530     return false;
 531
 532   /* Non-integral modes likely only happen with packed structures.
 533      Punt.  */
 534   if (!SCALAR_INT_MODE_P (fieldmode))
 535     return false;
 536
 537   /* The bit size must not be larger than the field mode, and
 538      the field mode must not be larger than a word.  */
 539   if (bitsize > modesize || modesize > BITS_PER_WORD)
 540     return false;
 541
 542   /* Check for cases of unaligned fields that must be split.  */
 543   if (bitnum % modesize + bitsize > modesize)
 544     return false;
 545
 546   /* The memory must be sufficiently aligned for a MODESIZE access.
 547      This condition guarantees, that the memory access will not
 548      touch anything after the end of the structure.  */
 549   if (MEM_ALIGN (op0) < modesize)
 550     return false;
 551
 552   /* Check for cases where the C++ memory model applies.  */
 553   if (bitregion_end != 0
 554       && (bitnum - bitnum % modesize < bitregion_start
 555           || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
 556     return false;
 557
 558   return true;
 559 }
 560
 561 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 562    bit number BITNUM can be treated as a simple value of mode MODE.  */
 563
 564 static bool
 565 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 566                        unsigned HOST_WIDE_INT bitnum, machine_mode mode)
 567 {
 568   return (MEM_P (op0)
 569           && bitnum % BITS_PER_UNIT == 0
 570           && bitsize == GET_MODE_BITSIZE (mode)
 571           && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
 572               || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
 573                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 574 }
 575 \f
 576 /* Try to use instruction INSV to store VALUE into a field of OP0.
 577    BITSIZE and BITNUM are as for store_bit_field.  */
 578
 579 static bool
 580 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 581                             unsigned HOST_WIDE_INT bitsize,
 582                             unsigned HOST_WIDE_INT bitnum,
 583                             rtx value)
 584 {
 585   struct expand_operand ops[4];
 586   rtx value1;
 587   rtx xop0 = op0;
 588   rtx_insn *last = get_last_insn ();
 589   bool copy_back = false;
 590
 591   machine_mode op_mode = insv->field_mode;
 592   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 593   if (bitsize == 0 || bitsize > unit)
 594     return false;
 595
 596   if (MEM_P (xop0))
 597     /* Get a reference to the first byte of the field.  */
 598     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 599                                  &bitnum);
 600   else
 601     {
 602       /* Convert from counting within OP0 to counting in OP_MODE.  */
 603       if (BYTES_BIG_ENDIAN)
 604         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 605
 606       /* If xop0 is a register, we need it in OP_MODE
 607          to make it acceptable to the format of insv.  */
 608       if (GET_CODE (xop0) == SUBREG)
 609         /* We can't just change the mode, because this might clobber op0,
 610            and we will need the original value of op0 if insv fails.  */
 611         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 612       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 613         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 614     }
 615
 616   /* If the destination is a paradoxical subreg such that we need a
 617      truncate to the inner mode, perform the insertion on a temporary and
 618      truncate the result to the original destination.  Note that we can't
 619      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 620      X) 0)) is (reg:N X).  */
 621   if (GET_CODE (xop0) == SUBREG
 622       && REG_P (SUBREG_REG (xop0))
 623       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 624                                          op_mode))
 625     {
 626       rtx tem = gen_reg_rtx (op_mode);
 627       emit_move_insn (tem, xop0);
 628       xop0 = tem;
 629       copy_back = true;
 630     }
 631
 632   /* There are similar overflow check at the start of store_bit_field_1,
 633      but that only check the situation where the field lies completely
 634      outside the register, while there do have situation where the field
 635      lies partialy in the register, we need to adjust bitsize for this
 636      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
 637      will broken on those arch support bit insert instruction, like arm, aarch64
 638      etc.  */
 639   if (bitsize + bitnum > unit && bitnum < unit)
 640     {
 641       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
 642                "destination object, data truncated into %wu-bit",
 643                bitsize, unit - bitnum);
 644       bitsize = unit - bitnum;
 645     }
 646
 647   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 648      "backwards" from the size of the unit we are inserting into.
 649      Otherwise, we count bits from the most significant on a
 650      BYTES/BITS_BIG_ENDIAN machine.  */
 651
 652   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 653     bitnum = unit - bitsize - bitnum;
 654
 655   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 656   value1 = value;
 657   if (GET_MODE (value) != op_mode)
 658     {
 659       if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 660         {
 661           rtx tmp;
 662           /* Optimization: Don't bother really extending VALUE
 663              if it has all the bits we will actually use.  However,
 664              if we must narrow it, be sure we do it correctly.  */
 665
 666           if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 667             {
 668               tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 669               if (! tmp)
 670                 tmp = simplify_gen_subreg (op_mode,
 671                                            force_reg (GET_MODE (value),
 672                                                       value1),
 673                                            GET_MODE (value), 0);
 674             }
 675           else
 676             {
 677               tmp = gen_lowpart_if_possible (op_mode, value1);
 678               if (! tmp)
 679                 tmp = gen_lowpart (op_mode, force_reg (GET_MODE (value),
 680                                                        value1));
 681             }
 682           value1 = tmp;
 683         }
 684       else if (CONST_INT_P (value))
 685         value1 = gen_int_mode (INTVAL (value), op_mode);
 686       else
 687         /* Parse phase is supposed to make VALUE's data type
 688            match that of the component reference, which is a type
 689            at least as wide as the field; so VALUE should have
 690            a mode that corresponds to that type.  */
 691         gcc_assert (CONSTANT_P (value));
 692     }
 693
 694   create_fixed_operand (&ops[0], xop0);
 695   create_integer_operand (&ops[1], bitsize);
 696   create_integer_operand (&ops[2], bitnum);
 697   create_input_operand (&ops[3], value1, op_mode);
 698   if (maybe_expand_insn (insv->icode, 4, ops))
 699     {
 700       if (copy_back)
 701         convert_move (op0, xop0, true);
 702       return true;
 703     }
 704   delete_insns_since (last);
 705   return false;
 706 }
 707
 708 /* A subroutine of store_bit_field, with the same arguments.  Return true
 709    if the operation could be implemented.
 710
 711    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 712    no other way of implementing the operation.  If FALLBACK_P is false,
 713    return false instead.  */
 714
 715 static bool
 716 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 717                    unsigned HOST_WIDE_INT bitnum,
 718                    unsigned HOST_WIDE_INT bitregion_start,
 719                    unsigned HOST_WIDE_INT bitregion_end,
 720                    machine_mode fieldmode,
 721                    rtx value, bool reverse, bool fallback_p)
 722 {
 723   rtx op0 = str_rtx;
 724   rtx orig_value;
 725
 726   while (GET_CODE (op0) == SUBREG)
 727     {
 728       /* The following line once was done only if WORDS_BIG_ENDIAN,
 729          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 730          meaningful at a much higher level; when structures are copied
 731          between memory and regs, the higher-numbered regs
 732          always get higher addresses.  */
 733       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 734       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 735       int byte_offset = 0;
 736
 737       /* Paradoxical subregs need special handling on big-endian machines.  */
 738       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 739         {
 740           int difference = inner_mode_size - outer_mode_size;
 741
 742           if (WORDS_BIG_ENDIAN)
 743             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 744           if (BYTES_BIG_ENDIAN)
 745             byte_offset += difference % UNITS_PER_WORD;
 746         }
 747       else
 748         byte_offset = SUBREG_BYTE (op0);
 749
 750       bitnum += byte_offset * BITS_PER_UNIT;
 751       op0 = SUBREG_REG (op0);
 752     }
 753
 754   /* No action is needed if the target is a register and if the field
 755      lies completely outside that register.  This can occur if the source
 756      code contains an out-of-bounds access to a small array.  */
 757   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 758     return true;
 759
 760   /* Use vec_set patterns for inserting parts of vectors whenever
 761      available.  */
 762   if (VECTOR_MODE_P (GET_MODE (op0))
 763       && !MEM_P (op0)
 764       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 765       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 766       && bitsize == GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
 767       && !(bitnum % GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
 768     {
 769       struct expand_operand ops[3];
 770       machine_mode outermode = GET_MODE (op0);
 771       machine_mode innermode = GET_MODE_INNER (outermode);
 772       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 773       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 774
 775       create_fixed_operand (&ops[0], op0);
 776       create_input_operand (&ops[1], value, innermode);
 777       create_integer_operand (&ops[2], pos);
 778       if (maybe_expand_insn (icode, 3, ops))
 779         return true;
 780     }
 781
 782   /* If the target is a register, overwriting the entire object, or storing
 783      a full-word or multi-word field can be done with just a SUBREG.  */
 784   if (!MEM_P (op0)
 785       && bitsize == GET_MODE_BITSIZE (fieldmode)
 786       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 787           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 788     {
 789       /* Use the subreg machinery either to narrow OP0 to the required
 790          words or to cope with mode punning between equal-sized modes.
 791          In the latter case, use subreg on the rhs side, not lhs.  */
 792       rtx sub;
 793
 794       if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
 795         {
 796           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 797           if (sub)
 798             {
 799               if (reverse)
 800                 sub = flip_storage_order (GET_MODE (op0), sub);
 801               emit_move_insn (op0, sub);
 802               return true;
 803             }
 804         }
 805       else
 806         {
 807           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 808                                      bitnum / BITS_PER_UNIT);
 809           if (sub)
 810             {
 811               if (reverse)
 812                 value = flip_storage_order (fieldmode, value);
 813               emit_move_insn (sub, value);
 814               return true;
 815             }
 816         }
 817     }
 818
 819   /* If the target is memory, storing any naturally aligned field can be
 820      done with a simple store.  For targets that support fast unaligned
 821      memory, any naturally sized, unit aligned field can be done directly.  */
 822   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
 823     {
 824       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 825       if (reverse)
 826         value = flip_storage_order (fieldmode, value);
 827       emit_move_insn (op0, value);
 828       return true;
 829     }
 830
 831   /* Make sure we are playing with integral modes.  Pun with subregs
 832      if we aren't.  This must come after the entire register case above,
 833      since that case is valid for any mode.  The following cases are only
 834      valid for integral modes.  */
 835   {
 836     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 837     if (imode != GET_MODE (op0))
 838       {
 839         if (MEM_P (op0))
 840           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
 841         else
 842           {
 843             gcc_assert (imode != BLKmode);
 844             op0 = gen_lowpart (imode, op0);
 845           }
 846       }
 847   }
 848
 849   /* Storing an lsb-aligned field in a register
 850      can be done with a movstrict instruction.  */
 851
 852   if (!MEM_P (op0)
 853       && !reverse
 854       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 855       && bitsize == GET_MODE_BITSIZE (fieldmode)
 856       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 857     {
 858       struct expand_operand ops[2];
 859       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 860       rtx arg0 = op0;
 861       unsigned HOST_WIDE_INT subreg_off;
 862
 863       if (GET_CODE (arg0) == SUBREG)
 864         {
 865           /* Else we've got some float mode source being extracted into
 866              a different float mode destination -- this combination of
 867              subregs results in Severe Tire Damage.  */
 868           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 869                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 870                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 871           arg0 = SUBREG_REG (arg0);
 872         }
 873
 874       subreg_off = bitnum / BITS_PER_UNIT;
 875       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 876         {
 877           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 878
 879           create_fixed_operand (&ops[0], arg0);
 880           /* Shrink the source operand to FIELDMODE.  */
 881           create_convert_operand_to (&ops[1], value, fieldmode, false);
 882           if (maybe_expand_insn (icode, 2, ops))
 883             return true;
 884         }
 885     }
 886
 887   /* Handle fields bigger than a word.  */
 888
 889   if (bitsize > BITS_PER_WORD)
 890     {
 891       /* Here we transfer the words of the field
 892          in the order least significant first.
 893          This is because the most significant word is the one which may
 894          be less than full.
 895          However, only do that if the value is not BLKmode.  */
 896
 897       const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 898       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 899       unsigned int i;
 900       rtx_insn *last;
 901
 902       /* This is the mode we must force value to, so that there will be enough
 903          subwords to extract.  Note that fieldmode will often (always?) be
 904          VOIDmode, because that is what store_field uses to indicate that this
 905          is a bit field, but passing VOIDmode to operand_subword_force
 906          is not allowed.  */
 907       fieldmode = GET_MODE (value);
 908       if (fieldmode == VOIDmode)
 909         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 910
 911       last = get_last_insn ();
 912       for (i = 0; i < nwords; i++)
 913         {
 914           /* If I is 0, use the low-order word in both field and target;
 915              if I is 1, use the next to lowest word; and so on.  */
 916           unsigned int wordnum = (backwards
 917                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 918                                   - i - 1
 919                                   : i);
 920           unsigned int bit_offset = (backwards ^ reverse
 921                                      ? MAX ((int) bitsize - ((int) i + 1)
 922                                             * BITS_PER_WORD,
 923                                             0)
 924                                      : (int) i * BITS_PER_WORD);
 925           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 926           unsigned HOST_WIDE_INT new_bitsize =
 927             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 928
 929           /* If the remaining chunk doesn't have full wordsize we have
 930              to make sure that for big-endian machines the higher order
 931              bits are used.  */
 932           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 933             value_word = simplify_expand_binop (word_mode, lshr_optab,
 934                                                 value_word,
 935                                                 GEN_INT (BITS_PER_WORD
 936                                                          - new_bitsize),
 937                                                 NULL_RTX, true,
 938                                                 OPTAB_LIB_WIDEN);
 939
 940           if (!store_bit_field_1 (op0, new_bitsize,
 941                                   bitnum + bit_offset,
 942                                   bitregion_start, bitregion_end,
 943                                   word_mode,
 944                                   value_word, reverse, fallback_p))
 945             {
 946               delete_insns_since (last);
 947               return false;
 948             }
 949         }
 950       return true;
 951     }
 952
 953   /* If VALUE has a floating-point or complex mode, access it as an
 954      integer of the corresponding size.  This can occur on a machine
 955      with 64 bit registers that uses SFmode for float.  It can also
 956      occur for unaligned float or complex fields.  */
 957   orig_value = value;
 958   if (GET_MODE (value) != VOIDmode
 959       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 960       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 961     {
 962       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 963       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 964     }
 965
 966   /* If OP0 is a multi-word register, narrow it to the affected word.
 967      If the region spans two words, defer to store_split_bit_field.  */
 968   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 969     {
 970       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
 971         {
 972           if (!fallback_p)
 973             return false;
 974
 975           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 976                                  bitregion_end, value, reverse);
 977           return true;
 978         }
 979       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
 980                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 981       gcc_assert (op0);
 982       bitnum %= BITS_PER_WORD;
 983     }
 984
 985   /* From here on we can assume that the field to be stored in fits
 986      within a word.  If the destination is a register, it too fits
 987      in a word.  */
 988
 989   extraction_insn insv;
 990   if (!MEM_P (op0)
 991       && !reverse
 992       && get_best_reg_extraction_insn (&insv, EP_insv,
 993                                        GET_MODE_BITSIZE (GET_MODE (op0)),
 994                                        fieldmode)
 995       && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 996     return true;
 997
 998   /* If OP0 is a memory, try copying it to a register and seeing if a
 999      cheap register alternative is available.  */
1000   if (MEM_P (op0) && !reverse)
1001     {
1002       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1003                                         fieldmode)
1004           && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
1005         return true;
1006
1007       rtx_insn *last = get_last_insn ();
1008
1009       /* Try loading part of OP0 into a register, inserting the bitfield
1010          into that, and then copying the result back to OP0.  */
1011       unsigned HOST_WIDE_INT bitpos;
1012       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1013                                                bitregion_start, bitregion_end,
1014                                                fieldmode, &bitpos);
1015       if (xop0)
1016         {
1017           rtx tempreg = copy_to_reg (xop0);
1018           if (store_bit_field_1 (tempreg, bitsize, bitpos,
1019                                  bitregion_start, bitregion_end,
1020                                  fieldmode, orig_value, reverse, false))
1021             {
1022               emit_move_insn (xop0, tempreg);
1023               return true;
1024             }
1025           delete_insns_since (last);
1026         }
1027     }
1028
1029   if (!fallback_p)
1030     return false;
1031
1032   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
1033                          bitregion_end, value, reverse);
1034   return true;
1035 }
1036
1037 /* Generate code to store value from rtx VALUE
1038    into a bit-field within structure STR_RTX
1039    containing BITSIZE bits starting at bit BITNUM.
1040
1041    BITREGION_START is bitpos of the first bitfield in this region.
1042    BITREGION_END is the bitpos of the ending bitfield in this region.
1043    These two fields are 0, if the C++ memory model does not apply,
1044    or we are not interested in keeping track of bitfield regions.
1045
1046    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1047
1048    If REVERSE is true, the store is to be done in reverse order.  */
1049
1050 void
1051 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1052                  unsigned HOST_WIDE_INT bitnum,
1053                  unsigned HOST_WIDE_INT bitregion_start,
1054                  unsigned HOST_WIDE_INT bitregion_end,
1055                  machine_mode fieldmode,
1056                  rtx value, bool reverse)
1057 {
1058   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1059   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
1060                                   bitregion_start, bitregion_end))
1061     {
1062       /* Storing of a full word can be done with a simple store.
1063          We know here that the field can be accessed with one single
1064          instruction.  For targets that support unaligned memory,
1065          an unaligned access may be necessary.  */
1066       if (bitsize == GET_MODE_BITSIZE (fieldmode))
1067         {
1068           str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
1069                                              bitnum / BITS_PER_UNIT);
1070           if (reverse)
1071             value = flip_storage_order (fieldmode, value);
1072           gcc_assert (bitnum % BITS_PER_UNIT == 0);
1073           emit_move_insn (str_rtx, value);
1074         }
1075       else
1076         {
1077           rtx temp;
1078
1079           str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
1080                                           &bitnum);
1081           gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (fieldmode));
1082           temp = copy_to_reg (str_rtx);
1083           if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
1084                                   fieldmode, value, reverse, true))
1085             gcc_unreachable ();
1086
1087           emit_move_insn (str_rtx, temp);
1088         }
1089
1090       return;
1091     }
1092
1093   /* Under the C++0x memory model, we must not touch bits outside the
1094      bit region.  Adjust the address to start at the beginning of the
1095      bit region.  */
1096   if (MEM_P (str_rtx) && bitregion_start > 0)
1097     {
1098       machine_mode bestmode;
1099       HOST_WIDE_INT offset, size;
1100
1101       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1102
1103       offset = bitregion_start / BITS_PER_UNIT;
1104       bitnum -= bitregion_start;
1105       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1106       bitregion_end -= bitregion_start;
1107       bitregion_start = 0;
1108       bestmode = get_best_mode (bitsize, bitnum,
1109                                 bitregion_start, bitregion_end,
1110                                 MEM_ALIGN (str_rtx), VOIDmode,
1111                                 MEM_VOLATILE_P (str_rtx));
1112       str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
1113     }
1114
1115   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1116                           bitregion_start, bitregion_end,
1117                           fieldmode, value, reverse, true))
1118     gcc_unreachable ();
1119 }
1120 \f
1121 /* Use shifts and boolean operations to store VALUE into a bit field of
1122    width BITSIZE in OP0, starting at bit BITNUM.
1123
1124    If REVERSE is true, the store is to be done in reverse order.  */
1125
1126 static void
1127 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1128                        unsigned HOST_WIDE_INT bitnum,
1129                        unsigned HOST_WIDE_INT bitregion_start,
1130                        unsigned HOST_WIDE_INT bitregion_end,
1131                        rtx value, bool reverse)
1132 {
1133   /* There is a case not handled here:
1134      a structure with a known alignment of just a halfword
1135      and a field split across two aligned halfwords within the structure.
1136      Or likewise a structure with a known alignment of just a byte
1137      and a field split across two bytes.
1138      Such cases are not supposed to be able to occur.  */
1139
1140   if (MEM_P (op0))
1141     {
1142       machine_mode mode = GET_MODE (op0);
1143       if (GET_MODE_BITSIZE (mode) == 0
1144           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1145         mode = word_mode;
1146       mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1147                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1148
1149       if (mode == VOIDmode)
1150         {
1151           /* The only way this should occur is if the field spans word
1152              boundaries.  */
1153           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1154                                  bitregion_end, value, reverse);
1155           return;
1156         }
1157
1158       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1159     }
1160
1161   store_fixed_bit_field_1 (op0, bitsize, bitnum, value, reverse);
1162 }
1163
1164 /* Helper function for store_fixed_bit_field, stores
1165    the bit field always using the MODE of OP0.  */
1166
1167 static void
1168 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1169                          unsigned HOST_WIDE_INT bitnum,
1170                          rtx value, bool reverse)
1171 {
1172   machine_mode mode;
1173   rtx temp;
1174   int all_zero = 0;
1175   int all_one = 0;
1176
1177   mode = GET_MODE (op0);
1178   gcc_assert (SCALAR_INT_MODE_P (mode));
1179
1180   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1181      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1182
1183   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1184     /* BITNUM is the distance between our msb
1185        and that of the containing datum.
1186        Convert it to the distance from the lsb.  */
1187     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1188
1189   /* Now BITNUM is always the distance between our lsb
1190      and that of OP0.  */
1191
1192   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1193      we must first convert its mode to MODE.  */
1194
1195   if (CONST_INT_P (value))
1196     {
1197       unsigned HOST_WIDE_INT v = UINTVAL (value);
1198
1199       if (bitsize < HOST_BITS_PER_WIDE_INT)
1200         v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1201
1202       if (v == 0)
1203         all_zero = 1;
1204       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1205                 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1206                || (bitsize == HOST_BITS_PER_WIDE_INT
1207                    && v == HOST_WIDE_INT_M1U))
1208         all_one = 1;
1209
1210       value = lshift_value (mode, v, bitnum);
1211     }
1212   else
1213     {
1214       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1215                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1216
1217       if (GET_MODE (value) != mode)
1218         value = convert_to_mode (mode, value, 1);
1219
1220       if (must_and)
1221         value = expand_binop (mode, and_optab, value,
1222                               mask_rtx (mode, 0, bitsize, 0),
1223                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1224       if (bitnum > 0)
1225         value = expand_shift (LSHIFT_EXPR, mode, value,
1226                               bitnum, NULL_RTX, 1);
1227     }
1228
1229   if (reverse)
1230     value = flip_storage_order (mode, value);
1231
1232   /* Now clear the chosen bits in OP0,
1233      except that if VALUE is -1 we need not bother.  */
1234   /* We keep the intermediates in registers to allow CSE to combine
1235      consecutive bitfield assignments.  */
1236
1237   temp = force_reg (mode, op0);
1238
1239   if (! all_one)
1240     {
1241       rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1242       if (reverse)
1243         mask = flip_storage_order (mode, mask);
1244       temp = expand_binop (mode, and_optab, temp, mask,
1245                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1246       temp = force_reg (mode, temp);
1247     }
1248
1249   /* Now logical-or VALUE into OP0, unless it is zero.  */
1250
1251   if (! all_zero)
1252     {
1253       temp = expand_binop (mode, ior_optab, temp, value,
1254                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1255       temp = force_reg (mode, temp);
1256     }
1257
1258   if (op0 != temp)
1259     {
1260       op0 = copy_rtx (op0);
1261       emit_move_insn (op0, temp);
1262     }
1263 }
1264 \f
1265 /* Store a bit field that is split across multiple accessible memory objects.
1266
1267    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1268    BITSIZE is the field width; BITPOS the position of its first bit
1269    (within the word).
1270    VALUE is the value to store.
1271
1272    If REVERSE is true, the store is to be done in reverse order.
1273
1274    This does not yet handle fields wider than BITS_PER_WORD.  */
1275
1276 static void
1277 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1278                        unsigned HOST_WIDE_INT bitpos,
1279                        unsigned HOST_WIDE_INT bitregion_start,
1280                        unsigned HOST_WIDE_INT bitregion_end,
1281                        rtx value, bool reverse)
1282 {
1283   unsigned int unit, total_bits, bitsdone = 0;
1284
1285   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1286      much at a time.  */
1287   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1288     unit = BITS_PER_WORD;
1289   else
1290     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1291
1292   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1293      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1294      again, and we will mutually recurse forever.  */
1295   if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1296     unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1297
1298   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1299      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1300      that VALUE might be a floating-point constant.  */
1301   if (CONSTANT_P (value) && !CONST_INT_P (value))
1302     {
1303       rtx word = gen_lowpart_common (word_mode, value);
1304
1305       if (word && (value != word))
1306         value = word;
1307       else
1308         value = gen_lowpart_common (word_mode,
1309                                     force_reg (GET_MODE (value) != VOIDmode
1310                                                ? GET_MODE (value)
1311                                                : word_mode, value));
1312     }
1313
1314   total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1315
1316   while (bitsdone < bitsize)
1317     {
1318       unsigned HOST_WIDE_INT thissize;
1319       unsigned HOST_WIDE_INT thispos;
1320       unsigned HOST_WIDE_INT offset;
1321       rtx part, word;
1322
1323       offset = (bitpos + bitsdone) / unit;
1324       thispos = (bitpos + bitsdone) % unit;
1325
1326       /* When region of bytes we can touch is restricted, decrease
1327          UNIT close to the end of the region as needed.  If op0 is a REG
1328          or SUBREG of REG, don't do this, as there can't be data races
1329          on a register and we can expand shorter code in some cases.  */
1330       if (bitregion_end
1331           && unit > BITS_PER_UNIT
1332           && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1333           && !REG_P (op0)
1334           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1335         {
1336           unit = unit / 2;
1337           continue;
1338         }
1339
1340       /* THISSIZE must not overrun a word boundary.  Otherwise,
1341          store_fixed_bit_field will call us again, and we will mutually
1342          recurse forever.  */
1343       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1344       thissize = MIN (thissize, unit - thispos);
1345
1346       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1347         {
1348           /* Fetch successively less significant portions.  */
1349           if (CONST_INT_P (value))
1350             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1351                              >> (bitsize - bitsdone - thissize))
1352                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1353           /* Likewise, but the source is little-endian.  */
1354           else if (reverse)
1355             part = extract_fixed_bit_field (word_mode, value, thissize,
1356                                             bitsize - bitsdone - thissize,
1357                                             NULL_RTX, 1, false);
1358           else
1359             {
1360               int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1361               /* The args are chosen so that the last part includes the
1362                  lsb.  Give extract_bit_field the value it needs (with
1363                  endianness compensation) to fetch the piece we want.  */
1364               part = extract_fixed_bit_field (word_mode, value, thissize,
1365                                               total_bits - bitsize + bitsdone,
1366                                               NULL_RTX, 1, false);
1367             }
1368         }
1369       else
1370         {
1371           /* Fetch successively more significant portions.  */
1372           if (CONST_INT_P (value))
1373             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1374                              >> bitsdone)
1375                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1376           /* Likewise, but the source is big-endian.  */
1377           else if (reverse)
1378             part = extract_fixed_bit_field (word_mode, value, thissize,
1379                                             total_bits - bitsdone - thissize,
1380                                             NULL_RTX, 1, false);
1381           else
1382             part = extract_fixed_bit_field (word_mode, value, thissize,
1383                                             bitsdone, NULL_RTX, 1, false);
1384         }
1385
1386       /* If OP0 is a register, then handle OFFSET here.  */
1387       if (SUBREG_P (op0) || REG_P (op0))
1388         {
1389           machine_mode op0_mode = GET_MODE (op0);
1390           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1391             word = offset ? const0_rtx : op0;
1392           else
1393             word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1394                                           GET_MODE (op0));
1395           offset &= BITS_PER_WORD / unit - 1;
1396         }
1397       else
1398         word = op0;
1399
1400       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1401          it is just an out-of-bounds access.  Ignore it.  */
1402       if (word != const0_rtx)
1403         store_fixed_bit_field (word, thissize, offset * unit + thispos,
1404                                bitregion_start, bitregion_end, part,
1405                                reverse);
1406       bitsdone += thissize;
1407     }
1408 }
1409 \f
1410 /* A subroutine of extract_bit_field_1 that converts return value X
1411    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1412    to extract_bit_field.  */
1413
1414 static rtx
1415 convert_extracted_bit_field (rtx x, machine_mode mode,
1416                              machine_mode tmode, bool unsignedp)
1417 {
1418   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1419     return x;
1420
1421   /* If the x mode is not a scalar integral, first convert to the
1422      integer mode of that size and then access it as a floating-point
1423      value via a SUBREG.  */
1424   if (!SCALAR_INT_MODE_P (tmode))
1425     {
1426       machine_mode smode;
1427
1428       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1429       x = convert_to_mode (smode, x, unsignedp);
1430       x = force_reg (smode, x);
1431       return gen_lowpart (tmode, x);
1432     }
1433
1434   return convert_to_mode (tmode, x, unsignedp);
1435 }
1436
1437 /* Try to use an ext(z)v pattern to extract a field from OP0.
1438    Return the extracted value on success, otherwise return null.
1439    EXT_MODE is the mode of the extraction and the other arguments
1440    are as for extract_bit_field.  */
1441
1442 static rtx
1443 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1444                               unsigned HOST_WIDE_INT bitsize,
1445                               unsigned HOST_WIDE_INT bitnum,
1446                               int unsignedp, rtx target,
1447                               machine_mode mode, machine_mode tmode)
1448 {
1449   struct expand_operand ops[4];
1450   rtx spec_target = target;
1451   rtx spec_target_subreg = 0;
1452   machine_mode ext_mode = extv->field_mode;
1453   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1454
1455   if (bitsize == 0 || unit < bitsize)
1456     return NULL_RTX;
1457
1458   if (MEM_P (op0))
1459     /* Get a reference to the first byte of the field.  */
1460     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1461                                 &bitnum);
1462   else
1463     {
1464       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1465       if (BYTES_BIG_ENDIAN)
1466         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1467
1468       /* If op0 is a register, we need it in EXT_MODE to make it
1469          acceptable to the format of ext(z)v.  */
1470       if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1471         return NULL_RTX;
1472       if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1473         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1474     }
1475
1476   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1477      "backwards" from the size of the unit we are extracting from.
1478      Otherwise, we count bits from the most significant on a
1479      BYTES/BITS_BIG_ENDIAN machine.  */
1480
1481   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1482     bitnum = unit - bitsize - bitnum;
1483
1484   if (target == 0)
1485     target = spec_target = gen_reg_rtx (tmode);
1486
1487   if (GET_MODE (target) != ext_mode)
1488     {
1489       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1490          between the mode of the extraction (word_mode) and the target
1491          mode.  Instead, create a temporary and use convert_move to set
1492          the target.  */
1493       if (REG_P (target)
1494           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1495         {
1496           target = gen_lowpart (ext_mode, target);
1497           if (GET_MODE_PRECISION (ext_mode)
1498               > GET_MODE_PRECISION (GET_MODE (spec_target)))
1499             spec_target_subreg = target;
1500         }
1501       else
1502         target = gen_reg_rtx (ext_mode);
1503     }
1504
1505   create_output_operand (&ops[0], target, ext_mode);
1506   create_fixed_operand (&ops[1], op0);
1507   create_integer_operand (&ops[2], bitsize);
1508   create_integer_operand (&ops[3], bitnum);
1509   if (maybe_expand_insn (extv->icode, 4, ops))
1510     {
1511       target = ops[0].value;
1512       if (target == spec_target)
1513         return target;
1514       if (target == spec_target_subreg)
1515         return spec_target;
1516       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1517     }
1518   return NULL_RTX;
1519 }
1520
1521 /* A subroutine of extract_bit_field, with the same arguments.
1522    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1523    if we can find no other means of implementing the operation.
1524    if FALLBACK_P is false, return NULL instead.  */
1525
1526 static rtx
1527 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1528                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1529                      machine_mode mode, machine_mode tmode,
1530                      bool reverse, bool fallback_p)
1531 {
1532   rtx op0 = str_rtx;
1533   machine_mode int_mode;
1534   machine_mode mode1;
1535
1536   if (tmode == VOIDmode)
1537     tmode = mode;
1538
1539   while (GET_CODE (op0) == SUBREG)
1540     {
1541       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1542       op0 = SUBREG_REG (op0);
1543     }
1544
1545   /* If we have an out-of-bounds access to a register, just return an
1546      uninitialized register of the required mode.  This can occur if the
1547      source code contains an out-of-bounds access to a small array.  */
1548   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1549     return gen_reg_rtx (tmode);
1550
1551   if (REG_P (op0)
1552       && mode == GET_MODE (op0)
1553       && bitnum == 0
1554       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1555     {
1556       if (reverse)
1557         op0 = flip_storage_order (mode, op0);
1558       /* We're trying to extract a full register from itself.  */
1559       return op0;
1560     }
1561
1562   /* See if we can get a better vector mode before extracting.  */
1563   if (VECTOR_MODE_P (GET_MODE (op0))
1564       && !MEM_P (op0)
1565       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1566     {
1567       machine_mode new_mode;
1568
1569       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1570         new_mode = MIN_MODE_VECTOR_FLOAT;
1571       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1572         new_mode = MIN_MODE_VECTOR_FRACT;
1573       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1574         new_mode = MIN_MODE_VECTOR_UFRACT;
1575       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1576         new_mode = MIN_MODE_VECTOR_ACCUM;
1577       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1578         new_mode = MIN_MODE_VECTOR_UACCUM;
1579       else
1580         new_mode = MIN_MODE_VECTOR_INT;
1581
1582       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1583         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1584             && GET_MODE_UNIT_SIZE (new_mode) == GET_MODE_SIZE (tmode)
1585             && targetm.vector_mode_supported_p (new_mode))
1586           break;
1587       if (new_mode != VOIDmode)
1588         op0 = gen_lowpart (new_mode, op0);
1589     }
1590
1591   /* Use vec_extract patterns for extracting parts of vectors whenever
1592      available.  */
1593   if (VECTOR_MODE_P (GET_MODE (op0))
1594       && !MEM_P (op0)
1595       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1596       && ((bitnum + bitsize - 1) / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
1597           == bitnum / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
1598     {
1599       struct expand_operand ops[3];
1600       machine_mode outermode = GET_MODE (op0);
1601       machine_mode innermode = GET_MODE_INNER (outermode);
1602       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1603       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1604
1605       create_output_operand (&ops[0], target, innermode);
1606       create_input_operand (&ops[1], op0, outermode);
1607       create_integer_operand (&ops[2], pos);
1608       if (maybe_expand_insn (icode, 3, ops))
1609         {
1610           target = ops[0].value;
1611           if (GET_MODE (target) != mode)
1612             return gen_lowpart (tmode, target);
1613           return target;
1614         }
1615     }
1616
1617   /* Make sure we are playing with integral modes.  Pun with subregs
1618      if we aren't.  */
1619   {
1620     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1621     if (imode != GET_MODE (op0))
1622       {
1623         if (MEM_P (op0))
1624           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1625         else if (imode != BLKmode)
1626           {
1627             op0 = gen_lowpart (imode, op0);
1628
1629             /* If we got a SUBREG, force it into a register since we
1630                aren't going to be able to do another SUBREG on it.  */
1631             if (GET_CODE (op0) == SUBREG)
1632               op0 = force_reg (imode, op0);
1633           }
1634         else
1635           {
1636             HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1637             rtx mem = assign_stack_temp (GET_MODE (op0), size);
1638             emit_move_insn (mem, op0);
1639             op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1640           }
1641       }
1642   }
1643
1644   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1645      If that's wrong, the solution is to test for it and set TARGET to 0
1646      if needed.  */
1647
1648   /* Get the mode of the field to use for atomic access or subreg
1649      conversion.  */
1650   mode1 = mode;
1651   if (SCALAR_INT_MODE_P (tmode))
1652     {
1653       machine_mode try_mode = mode_for_size (bitsize,
1654                                                   GET_MODE_CLASS (tmode), 0);
1655       if (try_mode != BLKmode)
1656         mode1 = try_mode;
1657     }
1658   gcc_assert (mode1 != BLKmode);
1659
1660   /* Extraction of a full MODE1 value can be done with a subreg as long
1661      as the least significant bit of the value is the least significant
1662      bit of either OP0 or a word of OP0.  */
1663   if (!MEM_P (op0)
1664       && !reverse
1665       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1666       && bitsize == GET_MODE_BITSIZE (mode1)
1667       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1668     {
1669       rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1670                                      bitnum / BITS_PER_UNIT);
1671       if (sub)
1672         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1673     }
1674
1675   /* Extraction of a full MODE1 value can be done with a load as long as
1676      the field is on a byte boundary and is sufficiently aligned.  */
1677   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1678     {
1679       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1680       if (reverse)
1681         op0 = flip_storage_order (mode1, op0);
1682       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1683     }
1684
1685   /* Handle fields bigger than a word.  */
1686
1687   if (bitsize > BITS_PER_WORD)
1688     {
1689       /* Here we transfer the words of the field
1690          in the order least significant first.
1691          This is because the most significant word is the one which may
1692          be less than full.  */
1693
1694       const bool backwards = WORDS_BIG_ENDIAN;
1695       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1696       unsigned int i;
1697       rtx_insn *last;
1698
1699       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1700         target = gen_reg_rtx (mode);
1701
1702       /* In case we're about to clobber a base register or something
1703          (see gcc.c-torture/execute/20040625-1.c).   */
1704       if (reg_mentioned_p (target, str_rtx))
1705         target = gen_reg_rtx (mode);
1706
1707       /* Indicate for flow that the entire target reg is being set.  */
1708       emit_clobber (target);
1709
1710       last = get_last_insn ();
1711       for (i = 0; i < nwords; i++)
1712         {
1713           /* If I is 0, use the low-order word in both field and target;
1714              if I is 1, use the next to lowest word; and so on.  */
1715           /* Word number in TARGET to use.  */
1716           unsigned int wordnum
1717             = (backwards
1718                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1719                : i);
1720           /* Offset from start of field in OP0.  */
1721           unsigned int bit_offset = (backwards ^ reverse
1722                                      ? MAX ((int) bitsize - ((int) i + 1)
1723                                             * BITS_PER_WORD,
1724                                             0)
1725                                      : (int) i * BITS_PER_WORD);
1726           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1727           rtx result_part
1728             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1729                                              bitsize - i * BITS_PER_WORD),
1730                                    bitnum + bit_offset, 1, target_part,
1731                                    mode, word_mode, reverse, fallback_p);
1732
1733           gcc_assert (target_part);
1734           if (!result_part)
1735             {
1736               delete_insns_since (last);
1737               return NULL;
1738             }
1739
1740           if (result_part != target_part)
1741             emit_move_insn (target_part, result_part);
1742         }
1743
1744       if (unsignedp)
1745         {
1746           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1747              need to be zero'd out.  */
1748           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1749             {
1750               unsigned int i, total_words;
1751
1752               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1753               for (i = nwords; i < total_words; i++)
1754                 emit_move_insn
1755                   (operand_subword (target,
1756                                     backwards ? total_words - i - 1 : i,
1757                                     1, VOIDmode),
1758                    const0_rtx);
1759             }
1760           return target;
1761         }
1762
1763       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1764       target = expand_shift (LSHIFT_EXPR, mode, target,
1765                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1766       return expand_shift (RSHIFT_EXPR, mode, target,
1767                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1768     }
1769
1770   /* If OP0 is a multi-word register, narrow it to the affected word.
1771      If the region spans two words, defer to extract_split_bit_field.  */
1772   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1773     {
1774       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1775         {
1776           if (!fallback_p)
1777             return NULL_RTX;
1778           target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1779                                             reverse);
1780           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1781         }
1782       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1783                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1784       bitnum %= BITS_PER_WORD;
1785     }
1786
1787   /* From here on we know the desired field is smaller than a word.
1788      If OP0 is a register, it too fits within a word.  */
1789   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1790   extraction_insn extv;
1791   if (!MEM_P (op0)
1792       && !reverse
1793       /* ??? We could limit the structure size to the part of OP0 that
1794          contains the field, with appropriate checks for endianness
1795          and TRULY_NOOP_TRUNCATION.  */
1796       && get_best_reg_extraction_insn (&extv, pattern,
1797                                        GET_MODE_BITSIZE (GET_MODE (op0)),
1798                                        tmode))
1799     {
1800       rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1801                                                  unsignedp, target, mode,
1802                                                  tmode);
1803       if (result)
1804         return result;
1805     }
1806
1807   /* If OP0 is a memory, try copying it to a register and seeing if a
1808      cheap register alternative is available.  */
1809   if (MEM_P (op0) & !reverse)
1810     {
1811       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1812                                         tmode))
1813         {
1814           rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1815                                                      bitnum, unsignedp,
1816                                                      target, mode,
1817                                                      tmode);
1818           if (result)
1819             return result;
1820         }
1821
1822       rtx_insn *last = get_last_insn ();
1823
1824       /* Try loading part of OP0 into a register and extracting the
1825          bitfield from that.  */
1826       unsigned HOST_WIDE_INT bitpos;
1827       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1828                                                0, 0, tmode, &bitpos);
1829       if (xop0)
1830         {
1831           xop0 = copy_to_reg (xop0);
1832           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1833                                             unsignedp, target,
1834                                             mode, tmode, reverse, false);
1835           if (result)
1836             return result;
1837           delete_insns_since (last);
1838         }
1839     }
1840
1841   if (!fallback_p)
1842     return NULL;
1843
1844   /* Find a correspondingly-sized integer field, so we can apply
1845      shifts and masks to it.  */
1846   int_mode = int_mode_for_mode (tmode);
1847   if (int_mode == BLKmode)
1848     int_mode = int_mode_for_mode (mode);
1849   /* Should probably push op0 out to memory and then do a load.  */
1850   gcc_assert (int_mode != BLKmode);
1851
1852   target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum, target,
1853                                     unsignedp, reverse);
1854
1855   /* Complex values must be reversed piecewise, so we need to undo the global
1856      reversal, convert to the complex mode and reverse again.  */
1857   if (reverse && COMPLEX_MODE_P (tmode))
1858     {
1859       target = flip_storage_order (int_mode, target);
1860       target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1861       target = flip_storage_order (tmode, target);
1862     }
1863   else
1864     target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1865
1866   return target;
1867 }
1868
1869 /* Generate code to extract a byte-field from STR_RTX
1870    containing BITSIZE bits, starting at BITNUM,
1871    and put it in TARGET if possible (if TARGET is nonzero).
1872    Regardless of TARGET, we return the rtx for where the value is placed.
1873
1874    STR_RTX is the structure containing the byte (a REG or MEM).
1875    UNSIGNEDP is nonzero if this is an unsigned bit field.
1876    MODE is the natural mode of the field value once extracted.
1877    TMODE is the mode the caller would like the value to have;
1878    but the value may be returned with type MODE instead.
1879
1880    If REVERSE is true, the extraction is to be done in reverse order.
1881
1882    If a TARGET is specified and we can store in it at no extra cost,
1883    we do so, and return TARGET.
1884    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1885    if they are equally easy.  */
1886
1887 rtx
1888 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1889                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1890                    machine_mode mode, machine_mode tmode, bool reverse)
1891 {
1892   machine_mode mode1;
1893
1894   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1895   if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1896     mode1 = GET_MODE (str_rtx);
1897   else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1898     mode1 = GET_MODE (target);
1899   else
1900     mode1 = tmode;
1901
1902   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1903     {
1904       /* Extraction of a full MODE1 value can be done with a simple load.
1905          We know here that the field can be accessed with one single
1906          instruction.  For targets that support unaligned memory,
1907          an unaligned access may be necessary.  */
1908       if (bitsize == GET_MODE_BITSIZE (mode1))
1909         {
1910           rtx result = adjust_bitfield_address (str_rtx, mode1,
1911                                                 bitnum / BITS_PER_UNIT);
1912           if (reverse)
1913             result = flip_storage_order (mode1, result);
1914           gcc_assert (bitnum % BITS_PER_UNIT == 0);
1915           return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1916         }
1917
1918       str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1919                                       &bitnum);
1920       gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (mode1));
1921       str_rtx = copy_to_reg (str_rtx);
1922     }
1923
1924   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1925                               target, mode, tmode, reverse, true);
1926 }
1927 \f
1928 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1929    from bit BITNUM of OP0.
1930
1931    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1932    If REVERSE is true, the extraction is to be done in reverse order.
1933
1934    If TARGET is nonzero, attempts to store the value there
1935    and return TARGET, but this is not guaranteed.
1936    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1937
1938 static rtx
1939 extract_fixed_bit_field (machine_mode tmode, rtx op0,
1940                          unsigned HOST_WIDE_INT bitsize,
1941                          unsigned HOST_WIDE_INT bitnum, rtx target,
1942                          int unsignedp, bool reverse)
1943 {
1944   if (MEM_P (op0))
1945     {
1946       machine_mode mode
1947         = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1948                          MEM_VOLATILE_P (op0));
1949
1950       if (mode == VOIDmode)
1951         /* The only way this should occur is if the field spans word
1952            boundaries.  */
1953         return extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1954                                         reverse);
1955
1956       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1957     }
1958
1959   return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1960                                     target, unsignedp, reverse);
1961 }
1962
1963 /* Helper function for extract_fixed_bit_field, extracts
1964    the bit field always using the MODE of OP0.  */
1965
1966 static rtx
1967 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
1968                            unsigned HOST_WIDE_INT bitsize,
1969                            unsigned HOST_WIDE_INT bitnum, rtx target,
1970                            int unsignedp, bool reverse)
1971 {
1972   machine_mode mode = GET_MODE (op0);
1973   gcc_assert (SCALAR_INT_MODE_P (mode));
1974
1975   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1976      for invalid input, such as extract equivalent of f5 from
1977      gcc.dg/pr48335-2.c.  */
1978
1979   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1980     /* BITNUM is the distance between our msb and that of OP0.
1981        Convert it to the distance from the lsb.  */
1982     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1983
1984   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1985      We have reduced the big-endian case to the little-endian case.  */
1986   if (reverse)
1987     op0 = flip_storage_order (mode, op0);
1988
1989   if (unsignedp)
1990     {
1991       if (bitnum)
1992         {
1993           /* If the field does not already start at the lsb,
1994              shift it so it does.  */
1995           /* Maybe propagate the target for the shift.  */
1996           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1997           if (tmode != mode)
1998             subtarget = 0;
1999           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2000         }
2001       /* Convert the value to the desired mode.  */
2002       if (mode != tmode)
2003         op0 = convert_to_mode (tmode, op0, 1);
2004
2005       /* Unless the msb of the field used to be the msb when we shifted,
2006          mask out the upper bits.  */
2007
2008       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2009         return expand_binop (GET_MODE (op0), and_optab, op0,
2010                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
2011                              target, 1, OPTAB_LIB_WIDEN);
2012       return op0;
2013     }
2014
2015   /* To extract a signed bit-field, first shift its msb to the msb of the word,
2016      then arithmetic-shift its lsb to the lsb of the word.  */
2017   op0 = force_reg (mode, op0);
2018
2019   /* Find the narrowest integer mode that contains the field.  */
2020
2021   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
2022        mode = GET_MODE_WIDER_MODE (mode))
2023     if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
2024       {
2025         op0 = convert_to_mode (mode, op0, 0);
2026         break;
2027       }
2028
2029   if (mode != tmode)
2030     target = 0;
2031
2032   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2033     {
2034       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2035       /* Maybe propagate the target for the shift.  */
2036       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2037       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2038     }
2039
2040   return expand_shift (RSHIFT_EXPR, mode, op0,
2041                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2042 }
2043
2044 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2045    VALUE << BITPOS.  */
2046
2047 static rtx
2048 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2049               int bitpos)
2050 {
2051   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2052 }
2053 \f
2054 /* Extract a bit field that is split across two words
2055    and return an RTX for the result.
2056
2057    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2058    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2059    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2060
2061    If REVERSE is true, the extraction is to be done in reverse order.  */
2062
2063 static rtx
2064 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2065                          unsigned HOST_WIDE_INT bitpos, int unsignedp,
2066                          bool reverse)
2067 {
2068   unsigned int unit;
2069   unsigned int bitsdone = 0;
2070   rtx result = NULL_RTX;
2071   int first = 1;
2072
2073   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2074      much at a time.  */
2075   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2076     unit = BITS_PER_WORD;
2077   else
2078     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2079
2080   while (bitsdone < bitsize)
2081     {
2082       unsigned HOST_WIDE_INT thissize;
2083       rtx part, word;
2084       unsigned HOST_WIDE_INT thispos;
2085       unsigned HOST_WIDE_INT offset;
2086
2087       offset = (bitpos + bitsdone) / unit;
2088       thispos = (bitpos + bitsdone) % unit;
2089
2090       /* THISSIZE must not overrun a word boundary.  Otherwise,
2091          extract_fixed_bit_field will call us again, and we will mutually
2092          recurse forever.  */
2093       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2094       thissize = MIN (thissize, unit - thispos);
2095
2096       /* If OP0 is a register, then handle OFFSET here.  */
2097       if (SUBREG_P (op0) || REG_P (op0))
2098         {
2099           word = operand_subword_force (op0, offset, GET_MODE (op0));
2100           offset = 0;
2101         }
2102       else
2103         word = op0;
2104
2105       /* Extract the parts in bit-counting order,
2106          whose meaning is determined by BYTES_PER_UNIT.
2107          OFFSET is in UNITs, and UNIT is in bits.  */
2108       part = extract_fixed_bit_field (word_mode, word, thissize,
2109                                       offset * unit + thispos, 0, 1, reverse);
2110       bitsdone += thissize;
2111
2112       /* Shift this part into place for the result.  */
2113       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2114         {
2115           if (bitsize != bitsdone)
2116             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2117                                  bitsize - bitsdone, 0, 1);
2118         }
2119       else
2120         {
2121           if (bitsdone != thissize)
2122             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2123                                  bitsdone - thissize, 0, 1);
2124         }
2125
2126       if (first)
2127         result = part;
2128       else
2129         /* Combine the parts with bitwise or.  This works
2130            because we extracted each part as an unsigned bit field.  */
2131         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2132                                OPTAB_LIB_WIDEN);
2133
2134       first = 0;
2135     }
2136
2137   /* Unsigned bit field: we are done.  */
2138   if (unsignedp)
2139     return result;
2140   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2141   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2142                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2143   return expand_shift (RSHIFT_EXPR, word_mode, result,
2144                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2145 }
2146 \f
2147 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2148    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2149    MODE, fill the upper bits with zeros.  Fail if the layout of either
2150    mode is unknown (as for CC modes) or if the extraction would involve
2151    unprofitable mode punning.  Return the value on success, otherwise
2152    return null.
2153
2154    This is different from gen_lowpart* in these respects:
2155
2156      - the returned value must always be considered an rvalue
2157
2158      - when MODE is wider than SRC_MODE, the extraction involves
2159        a zero extension
2160
2161      - when MODE is smaller than SRC_MODE, the extraction involves
2162        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2163
2164    In other words, this routine performs a computation, whereas the
2165    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2166    operations.  */
2167
2168 rtx
2169 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2170 {
2171   machine_mode int_mode, src_int_mode;
2172
2173   if (mode == src_mode)
2174     return src;
2175
2176   if (CONSTANT_P (src))
2177     {
2178       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2179          fails, it will happily create (subreg (symbol_ref)) or similar
2180          invalid SUBREGs.  */
2181       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2182       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2183       if (ret)
2184         return ret;
2185
2186       if (GET_MODE (src) == VOIDmode
2187           || !validate_subreg (mode, src_mode, src, byte))
2188         return NULL_RTX;
2189
2190       src = force_reg (GET_MODE (src), src);
2191       return gen_rtx_SUBREG (mode, src, byte);
2192     }
2193
2194   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2195     return NULL_RTX;
2196
2197   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2198       && MODES_TIEABLE_P (mode, src_mode))
2199     {
2200       rtx x = gen_lowpart_common (mode, src);
2201       if (x)
2202         return x;
2203     }
2204
2205   src_int_mode = int_mode_for_mode (src_mode);
2206   int_mode = int_mode_for_mode (mode);
2207   if (src_int_mode == BLKmode || int_mode == BLKmode)
2208     return NULL_RTX;
2209
2210   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2211     return NULL_RTX;
2212   if (!MODES_TIEABLE_P (int_mode, mode))
2213     return NULL_RTX;
2214
2215   src = gen_lowpart (src_int_mode, src);
2216   src = convert_modes (int_mode, src_int_mode, src, true);
2217   src = gen_lowpart (mode, src);
2218   return src;
2219 }
2220 \f
2221 /* Add INC into TARGET.  */
2222
2223 void
2224 expand_inc (rtx target, rtx inc)
2225 {
2226   rtx value = expand_binop (GET_MODE (target), add_optab,
2227                             target, inc,
2228                             target, 0, OPTAB_LIB_WIDEN);
2229   if (value != target)
2230     emit_move_insn (target, value);
2231 }
2232
2233 /* Subtract DEC from TARGET.  */
2234
2235 void
2236 expand_dec (rtx target, rtx dec)
2237 {
2238   rtx value = expand_binop (GET_MODE (target), sub_optab,
2239                             target, dec,
2240                             target, 0, OPTAB_LIB_WIDEN);
2241   if (value != target)
2242     emit_move_insn (target, value);
2243 }
2244 \f
2245 /* Output a shift instruction for expression code CODE,
2246    with SHIFTED being the rtx for the value to shift,
2247    and AMOUNT the rtx for the amount to shift by.
2248    Store the result in the rtx TARGET, if that is convenient.
2249    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2250    Return the rtx for where the value is.  */
2251
2252 static rtx
2253 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2254                 rtx amount, rtx target, int unsignedp)
2255 {
2256   rtx op1, temp = 0;
2257   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2258   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2259   optab lshift_optab = ashl_optab;
2260   optab rshift_arith_optab = ashr_optab;
2261   optab rshift_uns_optab = lshr_optab;
2262   optab lrotate_optab = rotl_optab;
2263   optab rrotate_optab = rotr_optab;
2264   machine_mode op1_mode;
2265   machine_mode scalar_mode = mode;
2266   int attempt;
2267   bool speed = optimize_insn_for_speed_p ();
2268
2269   if (VECTOR_MODE_P (mode))
2270     scalar_mode = GET_MODE_INNER (mode);
2271   op1 = amount;
2272   op1_mode = GET_MODE (op1);
2273
2274   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2275      shift amount is a vector, use the vector/vector shift patterns.  */
2276   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2277     {
2278       lshift_optab = vashl_optab;
2279       rshift_arith_optab = vashr_optab;
2280       rshift_uns_optab = vlshr_optab;
2281       lrotate_optab = vrotl_optab;
2282       rrotate_optab = vrotr_optab;
2283     }
2284
2285   /* Previously detected shift-counts computed by NEGATE_EXPR
2286      and shifted in the other direction; but that does not work
2287      on all machines.  */
2288
2289   if (SHIFT_COUNT_TRUNCATED)
2290     {
2291       if (CONST_INT_P (op1)
2292           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2293               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2294         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2295                        % GET_MODE_BITSIZE (scalar_mode));
2296       else if (GET_CODE (op1) == SUBREG
2297                && subreg_lowpart_p (op1)
2298                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2299                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2300         op1 = SUBREG_REG (op1);
2301     }
2302
2303   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2304      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2305      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2306      amount instead.  */
2307   if (rotate
2308       && CONST_INT_P (op1)
2309       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2310                    GET_MODE_BITSIZE (scalar_mode) - 1))
2311     {
2312       op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2313       left = !left;
2314       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2315     }
2316
2317   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2318      Note that this is not the case for bigger values.  For instance a rotation
2319      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2320      0x04030201 (bswapsi).  */
2321   if (rotate
2322       && CONST_INT_P (op1)
2323       && INTVAL (op1) == BITS_PER_UNIT
2324       && GET_MODE_SIZE (scalar_mode) == 2
2325       && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2326     return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2327                                   unsignedp);
2328
2329   if (op1 == const0_rtx)
2330     return shifted;
2331
2332   /* Check whether its cheaper to implement a left shift by a constant
2333      bit count by a sequence of additions.  */
2334   if (code == LSHIFT_EXPR
2335       && CONST_INT_P (op1)
2336       && INTVAL (op1) > 0
2337       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2338       && INTVAL (op1) < MAX_BITS_PER_WORD
2339       && (shift_cost (speed, mode, INTVAL (op1))
2340           > INTVAL (op1) * add_cost (speed, mode))
2341       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2342     {
2343       int i;
2344       for (i = 0; i < INTVAL (op1); i++)
2345         {
2346           temp = force_reg (mode, shifted);
2347           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2348                                   unsignedp, OPTAB_LIB_WIDEN);
2349         }
2350       return shifted;
2351     }
2352
2353   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2354     {
2355       enum optab_methods methods;
2356
2357       if (attempt == 0)
2358         methods = OPTAB_DIRECT;
2359       else if (attempt == 1)
2360         methods = OPTAB_WIDEN;
2361       else
2362         methods = OPTAB_LIB_WIDEN;
2363
2364       if (rotate)
2365         {
2366           /* Widening does not work for rotation.  */
2367           if (methods == OPTAB_WIDEN)
2368             continue;
2369           else if (methods == OPTAB_LIB_WIDEN)
2370             {
2371               /* If we have been unable to open-code this by a rotation,
2372                  do it as the IOR of two shifts.  I.e., to rotate A
2373                  by N bits, compute
2374                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2375                  where C is the bitsize of A.
2376
2377                  It is theoretically possible that the target machine might
2378                  not be able to perform either shift and hence we would
2379                  be making two libcalls rather than just the one for the
2380                  shift (similarly if IOR could not be done).  We will allow
2381                  this extremely unlikely lossage to avoid complicating the
2382                  code below.  */
2383
2384               rtx subtarget = target == shifted ? 0 : target;
2385               rtx new_amount, other_amount;
2386               rtx temp1;
2387
2388               new_amount = op1;
2389               if (op1 == const0_rtx)
2390                 return shifted;
2391               else if (CONST_INT_P (op1))
2392                 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2393                                         - INTVAL (op1));
2394               else
2395                 {
2396                   other_amount
2397                     = simplify_gen_unary (NEG, GET_MODE (op1),
2398                                           op1, GET_MODE (op1));
2399                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2400                   other_amount
2401                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2402                                            gen_int_mode (mask, GET_MODE (op1)));
2403                 }
2404
2405               shifted = force_reg (mode, shifted);
2406
2407               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2408                                      mode, shifted, new_amount, 0, 1);
2409               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2410                                       mode, shifted, other_amount,
2411                                       subtarget, 1);
2412               return expand_binop (mode, ior_optab, temp, temp1, target,
2413                                    unsignedp, methods);
2414             }
2415
2416           temp = expand_binop (mode,
2417                                left ? lrotate_optab : rrotate_optab,
2418                                shifted, op1, target, unsignedp, methods);
2419         }
2420       else if (unsignedp)
2421         temp = expand_binop (mode,
2422                              left ? lshift_optab : rshift_uns_optab,
2423                              shifted, op1, target, unsignedp, methods);
2424
2425       /* Do arithmetic shifts.
2426          Also, if we are going to widen the operand, we can just as well
2427          use an arithmetic right-shift instead of a logical one.  */
2428       if (temp == 0 && ! rotate
2429           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2430         {
2431           enum optab_methods methods1 = methods;
2432
2433           /* If trying to widen a log shift to an arithmetic shift,
2434              don't accept an arithmetic shift of the same size.  */
2435           if (unsignedp)
2436             methods1 = OPTAB_MUST_WIDEN;
2437
2438           /* Arithmetic shift */
2439
2440           temp = expand_binop (mode,
2441                                left ? lshift_optab : rshift_arith_optab,
2442                                shifted, op1, target, unsignedp, methods1);
2443         }
2444
2445       /* We used to try extzv here for logical right shifts, but that was
2446          only useful for one machine, the VAX, and caused poor code
2447          generation there for lshrdi3, so the code was deleted and a
2448          define_expand for lshrsi3 was added to vax.md.  */
2449     }
2450
2451   gcc_assert (temp);
2452   return temp;
2453 }
2454
2455 /* Output a shift instruction for expression code CODE,
2456    with SHIFTED being the rtx for the value to shift,
2457    and AMOUNT the amount to shift by.
2458    Store the result in the rtx TARGET, if that is convenient.
2459    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2460    Return the rtx for where the value is.  */
2461
2462 rtx
2463 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2464               int amount, rtx target, int unsignedp)
2465 {
2466   return expand_shift_1 (code, mode,
2467                          shifted, GEN_INT (amount), target, unsignedp);
2468 }
2469
2470 /* Output a shift instruction for expression code CODE,
2471    with SHIFTED being the rtx for the value to shift,
2472    and AMOUNT the tree for the amount to shift by.
2473    Store the result in the rtx TARGET, if that is convenient.
2474    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2475    Return the rtx for where the value is.  */
2476
2477 rtx
2478 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2479                        tree amount, rtx target, int unsignedp)
2480 {
2481   return expand_shift_1 (code, mode,
2482                          shifted, expand_normal (amount), target, unsignedp);
2483 }
2484
2485 \f
2486 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2487                         const struct mult_cost *, machine_mode mode);
2488 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2489                               const struct algorithm *, enum mult_variant);
2490 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2491 static rtx extract_high_half (machine_mode, rtx);
2492 static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2493 static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
2494                                        int, int);
2495 /* Compute and return the best algorithm for multiplying by T.
2496    The algorithm must cost less than cost_limit
2497    If retval.cost >= COST_LIMIT, no algorithm was found and all
2498    other field of the returned struct are undefined.
2499    MODE is the machine mode of the multiplication.  */
2500
2501 static void
2502 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2503             const struct mult_cost *cost_limit, machine_mode mode)
2504 {
2505   int m;
2506   struct algorithm *alg_in, *best_alg;
2507   struct mult_cost best_cost;
2508   struct mult_cost new_limit;
2509   int op_cost, op_latency;
2510   unsigned HOST_WIDE_INT orig_t = t;
2511   unsigned HOST_WIDE_INT q;
2512   int maxm, hash_index;
2513   bool cache_hit = false;
2514   enum alg_code cache_alg = alg_zero;
2515   bool speed = optimize_insn_for_speed_p ();
2516   machine_mode imode;
2517   struct alg_hash_entry *entry_ptr;
2518
2519   /* Indicate that no algorithm is yet found.  If no algorithm
2520      is found, this value will be returned and indicate failure.  */
2521   alg_out->cost.cost = cost_limit->cost + 1;
2522   alg_out->cost.latency = cost_limit->latency + 1;
2523
2524   if (cost_limit->cost < 0
2525       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2526     return;
2527
2528   /* Be prepared for vector modes.  */
2529   imode = GET_MODE_INNER (mode);
2530
2531   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2532
2533   /* Restrict the bits of "t" to the multiplication's mode.  */
2534   t &= GET_MODE_MASK (imode);
2535
2536   /* t == 1 can be done in zero cost.  */
2537   if (t == 1)
2538     {
2539       alg_out->ops = 1;
2540       alg_out->cost.cost = 0;
2541       alg_out->cost.latency = 0;
2542       alg_out->op[0] = alg_m;
2543       return;
2544     }
2545
2546   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2547      fail now.  */
2548   if (t == 0)
2549     {
2550       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2551         return;
2552       else
2553         {
2554           alg_out->ops = 1;
2555           alg_out->cost.cost = zero_cost (speed);
2556           alg_out->cost.latency = zero_cost (speed);
2557           alg_out->op[0] = alg_zero;
2558           return;
2559         }
2560     }
2561
2562   /* We'll be needing a couple extra algorithm structures now.  */
2563
2564   alg_in = XALLOCA (struct algorithm);
2565   best_alg = XALLOCA (struct algorithm);
2566   best_cost = *cost_limit;
2567
2568   /* Compute the hash index.  */
2569   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2570
2571   /* See if we already know what to do for T.  */
2572   entry_ptr = alg_hash_entry_ptr (hash_index);
2573   if (entry_ptr->t == t
2574       && entry_ptr->mode == mode
2575       && entry_ptr->mode == mode
2576       && entry_ptr->speed == speed
2577       && entry_ptr->alg != alg_unknown)
2578     {
2579       cache_alg = entry_ptr->alg;
2580
2581       if (cache_alg == alg_impossible)
2582         {
2583           /* The cache tells us that it's impossible to synthesize
2584              multiplication by T within entry_ptr->cost.  */
2585           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2586             /* COST_LIMIT is at least as restrictive as the one
2587                recorded in the hash table, in which case we have no
2588                hope of synthesizing a multiplication.  Just
2589                return.  */
2590             return;
2591
2592           /* If we get here, COST_LIMIT is less restrictive than the
2593              one recorded in the hash table, so we may be able to
2594              synthesize a multiplication.  Proceed as if we didn't
2595              have the cache entry.  */
2596         }
2597       else
2598         {
2599           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2600             /* The cached algorithm shows that this multiplication
2601                requires more cost than COST_LIMIT.  Just return.  This
2602                way, we don't clobber this cache entry with
2603                alg_impossible but retain useful information.  */
2604             return;
2605
2606           cache_hit = true;
2607
2608           switch (cache_alg)
2609             {
2610             case alg_shift:
2611               goto do_alg_shift;
2612
2613             case alg_add_t_m2:
2614             case alg_sub_t_m2:
2615               goto do_alg_addsub_t_m2;
2616
2617             case alg_add_factor:
2618             case alg_sub_factor:
2619               goto do_alg_addsub_factor;
2620
2621             case alg_add_t2_m:
2622               goto do_alg_add_t2_m;
2623
2624             case alg_sub_t2_m:
2625               goto do_alg_sub_t2_m;
2626
2627             default:
2628               gcc_unreachable ();
2629             }
2630         }
2631     }
2632
2633   /* If we have a group of zero bits at the low-order part of T, try
2634      multiplying by the remaining bits and then doing a shift.  */
2635
2636   if ((t & 1) == 0)
2637     {
2638     do_alg_shift:
2639       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2640       if (m < maxm)
2641         {
2642           q = t >> m;
2643           /* The function expand_shift will choose between a shift and
2644              a sequence of additions, so the observed cost is given as
2645              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2646           op_cost = m * add_cost (speed, mode);
2647           if (shift_cost (speed, mode, m) < op_cost)
2648             op_cost = shift_cost (speed, mode, m);
2649           new_limit.cost = best_cost.cost - op_cost;
2650           new_limit.latency = best_cost.latency - op_cost;
2651           synth_mult (alg_in, q, &new_limit, mode);
2652
2653           alg_in->cost.cost += op_cost;
2654           alg_in->cost.latency += op_cost;
2655           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2656             {
2657               best_cost = alg_in->cost;
2658               std::swap (alg_in, best_alg);
2659               best_alg->log[best_alg->ops] = m;
2660               best_alg->op[best_alg->ops] = alg_shift;
2661             }
2662
2663           /* See if treating ORIG_T as a signed number yields a better
2664              sequence.  Try this sequence only for a negative ORIG_T
2665              as it would be useless for a non-negative ORIG_T.  */
2666           if ((HOST_WIDE_INT) orig_t < 0)
2667             {
2668               /* Shift ORIG_T as follows because a right shift of a
2669                  negative-valued signed type is implementation
2670                  defined.  */
2671               q = ~(~orig_t >> m);
2672               /* The function expand_shift will choose between a shift
2673                  and a sequence of additions, so the observed cost is
2674                  given as MIN (m * add_cost(speed, mode),
2675                  shift_cost(speed, mode, m)).  */
2676               op_cost = m * add_cost (speed, mode);
2677               if (shift_cost (speed, mode, m) < op_cost)
2678                 op_cost = shift_cost (speed, mode, m);
2679               new_limit.cost = best_cost.cost - op_cost;
2680               new_limit.latency = best_cost.latency - op_cost;
2681               synth_mult (alg_in, q, &new_limit, mode);
2682
2683               alg_in->cost.cost += op_cost;
2684               alg_in->cost.latency += op_cost;
2685               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2686                 {
2687                   best_cost = alg_in->cost;
2688                   std::swap (alg_in, best_alg);
2689                   best_alg->log[best_alg->ops] = m;
2690                   best_alg->op[best_alg->ops] = alg_shift;
2691                 }
2692             }
2693         }
2694       if (cache_hit)
2695         goto done;
2696     }
2697
2698   /* If we have an odd number, add or subtract one.  */
2699   if ((t & 1) != 0)
2700     {
2701       unsigned HOST_WIDE_INT w;
2702
2703     do_alg_addsub_t_m2:
2704       for (w = 1; (w & t) != 0; w <<= 1)
2705         ;
2706       /* If T was -1, then W will be zero after the loop.  This is another
2707          case where T ends with ...111.  Handling this with (T + 1) and
2708          subtract 1 produces slightly better code and results in algorithm
2709          selection much faster than treating it like the ...0111 case
2710          below.  */
2711       if (w == 0
2712           || (w > 2
2713               /* Reject the case where t is 3.
2714                  Thus we prefer addition in that case.  */
2715               && t != 3))
2716         {
2717           /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2718
2719           op_cost = add_cost (speed, mode);
2720           new_limit.cost = best_cost.cost - op_cost;
2721           new_limit.latency = best_cost.latency - op_cost;
2722           synth_mult (alg_in, t + 1, &new_limit, mode);
2723
2724           alg_in->cost.cost += op_cost;
2725           alg_in->cost.latency += op_cost;
2726           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2727             {
2728               best_cost = alg_in->cost;
2729               std::swap (alg_in, best_alg);
2730               best_alg->log[best_alg->ops] = 0;
2731               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2732             }
2733         }
2734       else
2735         {
2736           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2737
2738           op_cost = add_cost (speed, mode);
2739           new_limit.cost = best_cost.cost - op_cost;
2740           new_limit.latency = best_cost.latency - op_cost;
2741           synth_mult (alg_in, t - 1, &new_limit, mode);
2742
2743           alg_in->cost.cost += op_cost;
2744           alg_in->cost.latency += op_cost;
2745           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2746             {
2747               best_cost = alg_in->cost;
2748               std::swap (alg_in, best_alg);
2749               best_alg->log[best_alg->ops] = 0;
2750               best_alg->op[best_alg->ops] = alg_add_t_m2;
2751             }
2752         }
2753
2754       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2755          quickly with a - a * n for some appropriate constant n.  */
2756       m = exact_log2 (-orig_t + 1);
2757       if (m >= 0 && m < maxm)
2758         {
2759           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2760           /* If the target has a cheap shift-and-subtract insn use
2761              that in preference to a shift insn followed by a sub insn.
2762              Assume that the shift-and-sub is "atomic" with a latency
2763              equal to it's cost, otherwise assume that on superscalar
2764              hardware the shift may be executed concurrently with the
2765              earlier steps in the algorithm.  */
2766           if (shiftsub1_cost (speed, mode, m) <= op_cost)
2767             {
2768               op_cost = shiftsub1_cost (speed, mode, m);
2769               op_latency = op_cost;
2770             }
2771           else
2772             op_latency = add_cost (speed, mode);
2773
2774           new_limit.cost = best_cost.cost - op_cost;
2775           new_limit.latency = best_cost.latency - op_latency;
2776           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2777                       &new_limit, mode);
2778
2779           alg_in->cost.cost += op_cost;
2780           alg_in->cost.latency += op_latency;
2781           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2782             {
2783               best_cost = alg_in->cost;
2784               std::swap (alg_in, best_alg);
2785               best_alg->log[best_alg->ops] = m;
2786               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2787             }
2788         }
2789
2790       if (cache_hit)
2791         goto done;
2792     }
2793
2794   /* Look for factors of t of the form
2795      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2796      If we find such a factor, we can multiply by t using an algorithm that
2797      multiplies by q, shift the result by m and add/subtract it to itself.
2798
2799      We search for large factors first and loop down, even if large factors
2800      are less probable than small; if we find a large factor we will find a
2801      good sequence quickly, and therefore be able to prune (by decreasing
2802      COST_LIMIT) the search.  */
2803
2804  do_alg_addsub_factor:
2805   for (m = floor_log2 (t - 1); m >= 2; m--)
2806     {
2807       unsigned HOST_WIDE_INT d;
2808
2809       d = (HOST_WIDE_INT_1U << m) + 1;
2810       if (t % d == 0 && t > d && m < maxm
2811           && (!cache_hit || cache_alg == alg_add_factor))
2812         {
2813           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2814           if (shiftadd_cost (speed, mode, m) <= op_cost)
2815             op_cost = shiftadd_cost (speed, mode, m);
2816
2817           op_latency = op_cost;
2818
2819
2820           new_limit.cost = best_cost.cost - op_cost;
2821           new_limit.latency = best_cost.latency - op_latency;
2822           synth_mult (alg_in, t / d, &new_limit, mode);
2823
2824           alg_in->cost.cost += op_cost;
2825           alg_in->cost.latency += op_latency;
2826           if (alg_in->cost.latency < op_cost)
2827             alg_in->cost.latency = op_cost;
2828           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2829             {
2830               best_cost = alg_in->cost;
2831               std::swap (alg_in, best_alg);
2832               best_alg->log[best_alg->ops] = m;
2833               best_alg->op[best_alg->ops] = alg_add_factor;
2834             }
2835           /* Other factors will have been taken care of in the recursion.  */
2836           break;
2837         }
2838
2839       d = (HOST_WIDE_INT_1U << m) - 1;
2840       if (t % d == 0 && t > d && m < maxm
2841           && (!cache_hit || cache_alg == alg_sub_factor))
2842         {
2843           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2844           if (shiftsub0_cost (speed, mode, m) <= op_cost)
2845             op_cost = shiftsub0_cost (speed, mode, m);
2846
2847           op_latency = op_cost;
2848
2849           new_limit.cost = best_cost.cost - op_cost;
2850           new_limit.latency = best_cost.latency - op_latency;
2851           synth_mult (alg_in, t / d, &new_limit, mode);
2852
2853           alg_in->cost.cost += op_cost;
2854           alg_in->cost.latency += op_latency;
2855           if (alg_in->cost.latency < op_cost)
2856             alg_in->cost.latency = op_cost;
2857           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2858             {
2859               best_cost = alg_in->cost;
2860               std::swap (alg_in, best_alg);
2861               best_alg->log[best_alg->ops] = m;
2862               best_alg->op[best_alg->ops] = alg_sub_factor;
2863             }
2864           break;
2865         }
2866     }
2867   if (cache_hit)
2868     goto done;
2869
2870   /* Try shift-and-add (load effective address) instructions,
2871      i.e. do a*3, a*5, a*9.  */
2872   if ((t & 1) != 0)
2873     {
2874     do_alg_add_t2_m:
2875       q = t - 1;
2876       q = q & -q;
2877       m = exact_log2 (q);
2878       if (m >= 0 && m < maxm)
2879         {
2880           op_cost = shiftadd_cost (speed, mode, m);
2881           new_limit.cost = best_cost.cost - op_cost;
2882           new_limit.latency = best_cost.latency - op_cost;
2883           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2884
2885           alg_in->cost.cost += op_cost;
2886           alg_in->cost.latency += op_cost;
2887           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2888             {
2889               best_cost = alg_in->cost;
2890               std::swap (alg_in, best_alg);
2891               best_alg->log[best_alg->ops] = m;
2892               best_alg->op[best_alg->ops] = alg_add_t2_m;
2893             }
2894         }
2895       if (cache_hit)
2896         goto done;
2897
2898     do_alg_sub_t2_m:
2899       q = t + 1;
2900       q = q & -q;
2901       m = exact_log2 (q);
2902       if (m >= 0 && m < maxm)
2903         {
2904           op_cost = shiftsub0_cost (speed, mode, m);
2905           new_limit.cost = best_cost.cost - op_cost;
2906           new_limit.latency = best_cost.latency - op_cost;
2907           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2908
2909           alg_in->cost.cost += op_cost;
2910           alg_in->cost.latency += op_cost;
2911           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2912             {
2913               best_cost = alg_in->cost;
2914               std::swap (alg_in, best_alg);
2915               best_alg->log[best_alg->ops] = m;
2916               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2917             }
2918         }
2919       if (cache_hit)
2920         goto done;
2921     }
2922
2923  done:
2924   /* If best_cost has not decreased, we have not found any algorithm.  */
2925   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2926     {
2927       /* We failed to find an algorithm.  Record alg_impossible for
2928          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2929          we are asked to find an algorithm for T within the same or
2930          lower COST_LIMIT, we can immediately return to the
2931          caller.  */
2932       entry_ptr->t = t;
2933       entry_ptr->mode = mode;
2934       entry_ptr->speed = speed;
2935       entry_ptr->alg = alg_impossible;
2936       entry_ptr->cost = *cost_limit;
2937       return;
2938     }
2939
2940   /* Cache the result.  */
2941   if (!cache_hit)
2942     {
2943       entry_ptr->t = t;
2944       entry_ptr->mode = mode;
2945       entry_ptr->speed = speed;
2946       entry_ptr->alg = best_alg->op[best_alg->ops];
2947       entry_ptr->cost.cost = best_cost.cost;
2948       entry_ptr->cost.latency = best_cost.latency;
2949     }
2950
2951   /* If we are getting a too long sequence for `struct algorithm'
2952      to record, make this search fail.  */
2953   if (best_alg->ops == MAX_BITS_PER_WORD)
2954     return;
2955
2956   /* Copy the algorithm from temporary space to the space at alg_out.
2957      We avoid using structure assignment because the majority of
2958      best_alg is normally undefined, and this is a critical function.  */
2959   alg_out->ops = best_alg->ops + 1;
2960   alg_out->cost = best_cost;
2961   memcpy (alg_out->op, best_alg->op,
2962           alg_out->ops * sizeof *alg_out->op);
2963   memcpy (alg_out->log, best_alg->log,
2964           alg_out->ops * sizeof *alg_out->log);
2965 }
2966 \f
2967 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2968    Try three variations:
2969
2970        - a shift/add sequence based on VAL itself
2971        - a shift/add sequence based on -VAL, followed by a negation
2972        - a shift/add sequence based on VAL - 1, followed by an addition.
2973
2974    Return true if the cheapest of these cost less than MULT_COST,
2975    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2976
2977 bool
2978 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
2979                      struct algorithm *alg, enum mult_variant *variant,
2980                      int mult_cost)
2981 {
2982   struct algorithm alg2;
2983   struct mult_cost limit;
2984   int op_cost;
2985   bool speed = optimize_insn_for_speed_p ();
2986
2987   /* Fail quickly for impossible bounds.  */
2988   if (mult_cost < 0)
2989     return false;
2990
2991   /* Ensure that mult_cost provides a reasonable upper bound.
2992      Any constant multiplication can be performed with less
2993      than 2 * bits additions.  */
2994   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2995   if (mult_cost > op_cost)
2996     mult_cost = op_cost;
2997
2998   *variant = basic_variant;
2999   limit.cost = mult_cost;
3000   limit.latency = mult_cost;
3001   synth_mult (alg, val, &limit, mode);
3002
3003   /* This works only if the inverted value actually fits in an
3004      `unsigned int' */
3005   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3006     {
3007       op_cost = neg_cost (speed, mode);
3008       if (MULT_COST_LESS (&alg->cost, mult_cost))
3009         {
3010           limit.cost = alg->cost.cost - op_cost;
3011           limit.latency = alg->cost.latency - op_cost;
3012         }
3013       else
3014         {
3015           limit.cost = mult_cost - op_cost;
3016           limit.latency = mult_cost - op_cost;
3017         }
3018
3019       synth_mult (&alg2, -val, &limit, mode);
3020       alg2.cost.cost += op_cost;
3021       alg2.cost.latency += op_cost;
3022       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3023         *alg = alg2, *variant = negate_variant;
3024     }
3025
3026   /* This proves very useful for division-by-constant.  */
3027   op_cost = add_cost (speed, mode);
3028   if (MULT_COST_LESS (&alg->cost, mult_cost))
3029     {
3030       limit.cost = alg->cost.cost - op_cost;
3031       limit.latency = alg->cost.latency - op_cost;
3032     }
3033   else
3034     {
3035       limit.cost = mult_cost - op_cost;
3036       limit.latency = mult_cost - op_cost;
3037     }
3038
3039   synth_mult (&alg2, val - 1, &limit, mode);
3040   alg2.cost.cost += op_cost;
3041   alg2.cost.latency += op_cost;
3042   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3043     *alg = alg2, *variant = add_variant;
3044
3045   return MULT_COST_LESS (&alg->cost, mult_cost);
3046 }
3047
3048 /* A subroutine of expand_mult, used for constant multiplications.
3049    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3050    convenient.  Use the shift/add sequence described by ALG and apply
3051    the final fixup specified by VARIANT.  */
3052
3053 static rtx
3054 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3055                    rtx target, const struct algorithm *alg,
3056                    enum mult_variant variant)
3057 {
3058   HOST_WIDE_INT val_so_far;
3059   rtx_insn *insn;
3060   rtx accum, tem;
3061   int opno;
3062   machine_mode nmode;
3063
3064   /* Avoid referencing memory over and over and invalid sharing
3065      on SUBREGs.  */
3066   op0 = force_reg (mode, op0);
3067
3068   /* ACCUM starts out either as OP0 or as a zero, depending on
3069      the first operation.  */
3070
3071   if (alg->op[0] == alg_zero)
3072     {
3073       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3074       val_so_far = 0;
3075     }
3076   else if (alg->op[0] == alg_m)
3077     {
3078       accum = copy_to_mode_reg (mode, op0);
3079       val_so_far = 1;
3080     }
3081   else
3082     gcc_unreachable ();
3083
3084   for (opno = 1; opno < alg->ops; opno++)
3085     {
3086       int log = alg->log[opno];
3087       rtx shift_subtarget = optimize ? 0 : accum;
3088       rtx add_target
3089         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3090            && !optimize)
3091           ? target : 0;
3092       rtx accum_target = optimize ? 0 : accum;
3093       rtx accum_inner;
3094
3095       switch (alg->op[opno])
3096         {
3097         case alg_shift:
3098           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3099           /* REG_EQUAL note will be attached to the following insn.  */
3100           emit_move_insn (accum, tem);
3101           val_so_far <<= log;
3102           break;
3103
3104         case alg_add_t_m2:
3105           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3106           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3107                                  add_target ? add_target : accum_target);
3108           val_so_far += HOST_WIDE_INT_1 << log;
3109           break;
3110
3111         case alg_sub_t_m2:
3112           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3113           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3114                                  add_target ? add_target : accum_target);
3115           val_so_far -= HOST_WIDE_INT_1 << log;
3116           break;
3117
3118         case alg_add_t2_m:
3119           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3120                                 log, shift_subtarget, 0);
3121           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3122                                  add_target ? add_target : accum_target);
3123           val_so_far = (val_so_far << log) + 1;
3124           break;
3125
3126         case alg_sub_t2_m:
3127           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3128                                 log, shift_subtarget, 0);
3129           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3130                                  add_target ? add_target : accum_target);
3131           val_so_far = (val_so_far << log) - 1;
3132           break;
3133
3134         case alg_add_factor:
3135           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3136           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3137                                  add_target ? add_target : accum_target);
3138           val_so_far += val_so_far << log;
3139           break;
3140
3141         case alg_sub_factor:
3142           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3143           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3144                                  (add_target
3145                                   ? add_target : (optimize ? 0 : tem)));
3146           val_so_far = (val_so_far << log) - val_so_far;
3147           break;
3148
3149         default:
3150           gcc_unreachable ();
3151         }
3152
3153       if (SCALAR_INT_MODE_P (mode))
3154         {
3155           /* Write a REG_EQUAL note on the last insn so that we can cse
3156              multiplication sequences.  Note that if ACCUM is a SUBREG,
3157              we've set the inner register and must properly indicate that.  */
3158           tem = op0, nmode = mode;
3159           accum_inner = accum;
3160           if (GET_CODE (accum) == SUBREG)
3161             {
3162               accum_inner = SUBREG_REG (accum);
3163               nmode = GET_MODE (accum_inner);
3164               tem = gen_lowpart (nmode, op0);
3165             }
3166
3167           insn = get_last_insn ();
3168           set_dst_reg_note (insn, REG_EQUAL,
3169                             gen_rtx_MULT (nmode, tem,
3170                                           gen_int_mode (val_so_far, nmode)),
3171                             accum_inner);
3172         }
3173     }
3174
3175   if (variant == negate_variant)
3176     {
3177       val_so_far = -val_so_far;
3178       accum = expand_unop (mode, neg_optab, accum, target, 0);
3179     }
3180   else if (variant == add_variant)
3181     {
3182       val_so_far = val_so_far + 1;
3183       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3184     }
3185
3186   /* Compare only the bits of val and val_so_far that are significant
3187      in the result mode, to avoid sign-/zero-extension confusion.  */
3188   nmode = GET_MODE_INNER (mode);
3189   val &= GET_MODE_MASK (nmode);
3190   val_so_far &= GET_MODE_MASK (nmode);
3191   gcc_assert (val == val_so_far);
3192
3193   return accum;
3194 }
3195
3196 /* Perform a multiplication and return an rtx for the result.
3197    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3198    TARGET is a suggestion for where to store the result (an rtx).
3199
3200    We check specially for a constant integer as OP1.
3201    If you want this check for OP0 as well, then before calling
3202    you should swap the two operands if OP0 would be constant.  */
3203
3204 rtx
3205 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3206              int unsignedp)
3207 {
3208   enum mult_variant variant;
3209   struct algorithm algorithm;
3210   rtx scalar_op1;
3211   int max_cost;
3212   bool speed = optimize_insn_for_speed_p ();
3213   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3214
3215   if (CONSTANT_P (op0))
3216     std::swap (op0, op1);
3217
3218   /* For vectors, there are several simplifications that can be made if
3219      all elements of the vector constant are identical.  */
3220   scalar_op1 = unwrap_const_vec_duplicate (op1);
3221
3222   if (INTEGRAL_MODE_P (mode))
3223     {
3224       rtx fake_reg;
3225       HOST_WIDE_INT coeff;
3226       bool is_neg;
3227       int mode_bitsize;
3228
3229       if (op1 == CONST0_RTX (mode))
3230         return op1;
3231       if (op1 == CONST1_RTX (mode))
3232         return op0;
3233       if (op1 == CONSTM1_RTX (mode))
3234         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3235                             op0, target, 0);
3236
3237       if (do_trapv)
3238         goto skip_synth;
3239
3240       /* If mode is integer vector mode, check if the backend supports
3241          vector lshift (by scalar or vector) at all.  If not, we can't use
3242          synthetized multiply.  */
3243       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3244           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3245           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3246         goto skip_synth;
3247
3248       /* These are the operations that are potentially turned into
3249          a sequence of shifts and additions.  */
3250       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3251
3252       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3253          less than or equal in size to `unsigned int' this doesn't matter.
3254          If the mode is larger than `unsigned int', then synth_mult works
3255          only if the constant value exactly fits in an `unsigned int' without
3256          any truncation.  This means that multiplying by negative values does
3257          not work; results are off by 2^32 on a 32 bit machine.  */
3258       if (CONST_INT_P (scalar_op1))
3259         {
3260           coeff = INTVAL (scalar_op1);
3261           is_neg = coeff < 0;
3262         }
3263 #if TARGET_SUPPORTS_WIDE_INT
3264       else if (CONST_WIDE_INT_P (scalar_op1))
3265 #else
3266       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3267 #endif
3268         {
3269           int shift = wi::exact_log2 (std::make_pair (scalar_op1, mode));
3270           /* Perfect power of 2 (other than 1, which is handled above).  */
3271           if (shift > 0)
3272             return expand_shift (LSHIFT_EXPR, mode, op0,
3273                                  shift, target, unsignedp);
3274           else
3275             goto skip_synth;
3276         }
3277       else
3278         goto skip_synth;
3279
3280       /* We used to test optimize here, on the grounds that it's better to
3281          produce a smaller program when -O is not used.  But this causes
3282          such a terrible slowdown sometimes that it seems better to always
3283          use synth_mult.  */
3284
3285       /* Special case powers of two.  */
3286       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3287           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3288         return expand_shift (LSHIFT_EXPR, mode, op0,
3289                              floor_log2 (coeff), target, unsignedp);
3290
3291       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3292
3293       /* Attempt to handle multiplication of DImode values by negative
3294          coefficients, by performing the multiplication by a positive
3295          multiplier and then inverting the result.  */
3296       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3297         {
3298           /* Its safe to use -coeff even for INT_MIN, as the
3299              result is interpreted as an unsigned coefficient.
3300              Exclude cost of op0 from max_cost to match the cost
3301              calculation of the synth_mult.  */
3302           coeff = -(unsigned HOST_WIDE_INT) coeff;
3303           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3304                                     mode, speed)
3305                       - neg_cost (speed, mode));
3306           if (max_cost <= 0)
3307             goto skip_synth;
3308
3309           /* Special case powers of two.  */
3310           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3311             {
3312               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3313                                        floor_log2 (coeff), target, unsignedp);
3314               return expand_unop (mode, neg_optab, temp, target, 0);
3315             }
3316
3317           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3318                                    max_cost))
3319             {
3320               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3321                                             &algorithm, variant);
3322               return expand_unop (mode, neg_optab, temp, target, 0);
3323             }
3324           goto skip_synth;
3325         }
3326
3327       /* Exclude cost of op0 from max_cost to match the cost
3328          calculation of the synth_mult.  */
3329       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3330       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3331         return expand_mult_const (mode, op0, coeff, target,
3332                                   &algorithm, variant);
3333     }
3334  skip_synth:
3335
3336   /* Expand x*2.0 as x+x.  */
3337   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3338       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3339     {
3340       op0 = force_reg (GET_MODE (op0), op0);
3341       return expand_binop (mode, add_optab, op0, op0,
3342                            target, unsignedp, OPTAB_LIB_WIDEN);
3343     }
3344
3345   /* This used to use umul_optab if unsigned, but for non-widening multiply
3346      there is no difference between signed and unsigned.  */
3347   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3348                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3349   gcc_assert (op0);
3350   return op0;
3351 }
3352
3353 /* Return a cost estimate for multiplying a register by the given
3354    COEFFicient in the given MODE and SPEED.  */
3355
3356 int
3357 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3358 {
3359   int max_cost;
3360   struct algorithm algorithm;
3361   enum mult_variant variant;
3362
3363   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3364   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3365                            mode, speed);
3366   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3367     return algorithm.cost.cost;
3368   else
3369     return max_cost;
3370 }
3371
3372 /* Perform a widening multiplication and return an rtx for the result.
3373    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3374    TARGET is a suggestion for where to store the result (an rtx).
3375    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3376    or smul_widen_optab.
3377
3378    We check specially for a constant integer as OP1, comparing the
3379    cost of a widening multiply against the cost of a sequence of shifts
3380    and adds.  */
3381
3382 rtx
3383 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3384                       int unsignedp, optab this_optab)
3385 {
3386   bool speed = optimize_insn_for_speed_p ();
3387   rtx cop1;
3388
3389   if (CONST_INT_P (op1)
3390       && GET_MODE (op0) != VOIDmode
3391       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3392                                 this_optab == umul_widen_optab))
3393       && CONST_INT_P (cop1)
3394       && (INTVAL (cop1) >= 0
3395           || HWI_COMPUTABLE_MODE_P (mode)))
3396     {
3397       HOST_WIDE_INT coeff = INTVAL (cop1);
3398       int max_cost;
3399       enum mult_variant variant;
3400       struct algorithm algorithm;
3401
3402       if (coeff == 0)
3403         return CONST0_RTX (mode);
3404
3405       /* Special case powers of two.  */
3406       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3407         {
3408           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3409           return expand_shift (LSHIFT_EXPR, mode, op0,
3410                                floor_log2 (coeff), target, unsignedp);
3411         }
3412
3413       /* Exclude cost of op0 from max_cost to match the cost
3414          calculation of the synth_mult.  */
3415       max_cost = mul_widen_cost (speed, mode);
3416       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3417                                max_cost))
3418         {
3419           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3420           return expand_mult_const (mode, op0, coeff, target,
3421                                     &algorithm, variant);
3422         }
3423     }
3424   return expand_binop (mode, this_optab, op0, op1, target,
3425                        unsignedp, OPTAB_LIB_WIDEN);
3426 }
3427 \f
3428 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3429    replace division by D, and put the least significant N bits of the result
3430    in *MULTIPLIER_PTR and return the most significant bit.
3431
3432    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3433    needed precision is in PRECISION (should be <= N).
3434
3435    PRECISION should be as small as possible so this function can choose
3436    multiplier more freely.
3437
3438    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3439    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3440
3441    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3442    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3443
3444 unsigned HOST_WIDE_INT
3445 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3446                    unsigned HOST_WIDE_INT *multiplier_ptr,
3447                    int *post_shift_ptr, int *lgup_ptr)
3448 {
3449   int lgup, post_shift;
3450   int pow, pow2;
3451
3452   /* lgup = ceil(log2(divisor)); */
3453   lgup = ceil_log2 (d);
3454
3455   gcc_assert (lgup <= n);
3456
3457   pow = n + lgup;
3458   pow2 = n + lgup - precision;
3459
3460   /* mlow = 2^(N + lgup)/d */
3461   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3462   wide_int mlow = wi::udiv_trunc (val, d);
3463
3464   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3465   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3466   wide_int mhigh = wi::udiv_trunc (val, d);
3467
3468   /* If precision == N, then mlow, mhigh exceed 2^N
3469      (but they do not exceed 2^(N+1)).  */
3470
3471   /* Reduce to lowest terms.  */
3472   for (post_shift = lgup; post_shift > 0; post_shift--)
3473     {
3474       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3475                                                        HOST_BITS_PER_WIDE_INT);
3476       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3477                                                        HOST_BITS_PER_WIDE_INT);
3478       if (ml_lo >= mh_lo)
3479         break;
3480
3481       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3482       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3483     }
3484
3485   *post_shift_ptr = post_shift;
3486   *lgup_ptr = lgup;
3487   if (n < HOST_BITS_PER_WIDE_INT)
3488     {
3489       unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3490       *multiplier_ptr = mhigh.to_uhwi () & mask;
3491       return mhigh.to_uhwi () >= mask;
3492     }
3493   else
3494     {
3495       *multiplier_ptr = mhigh.to_uhwi ();
3496       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3497     }
3498 }
3499
3500 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3501    congruent to 1 (mod 2**N).  */
3502
3503 static unsigned HOST_WIDE_INT
3504 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3505 {
3506   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3507
3508   /* The algorithm notes that the choice y = x satisfies
3509      x*y == 1 mod 2^3, since x is assumed odd.
3510      Each iteration doubles the number of bits of significance in y.  */
3511
3512   unsigned HOST_WIDE_INT mask;
3513   unsigned HOST_WIDE_INT y = x;
3514   int nbit = 3;
3515
3516   mask = (n == HOST_BITS_PER_WIDE_INT
3517           ? HOST_WIDE_INT_M1U
3518           : (HOST_WIDE_INT_1U << n) - 1);
3519
3520   while (nbit < n)
3521     {
3522       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3523       nbit *= 2;
3524     }
3525   return y;
3526 }
3527
3528 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3529    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3530    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3531    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3532    become signed.
3533
3534    The result is put in TARGET if that is convenient.
3535
3536    MODE is the mode of operation.  */
3537
3538 rtx
3539 expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
3540                              rtx op1, rtx target, int unsignedp)
3541 {
3542   rtx tem;
3543   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3544
3545   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3546                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3547   tem = expand_and (mode, tem, op1, NULL_RTX);
3548   adj_operand
3549     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3550                      adj_operand);
3551
3552   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3553                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3554   tem = expand_and (mode, tem, op0, NULL_RTX);
3555   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3556                           target);
3557
3558   return target;
3559 }
3560
3561 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3562
3563 static rtx
3564 extract_high_half (machine_mode mode, rtx op)
3565 {
3566   machine_mode wider_mode;
3567
3568   if (mode == word_mode)
3569     return gen_highpart (mode, op);
3570
3571   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3572
3573   wider_mode = GET_MODE_WIDER_MODE (mode);
3574   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3575                      GET_MODE_BITSIZE (mode), 0, 1);
3576   return convert_modes (mode, wider_mode, op, 0);
3577 }
3578
3579 /* Like expmed_mult_highpart, but only consider using a multiplication
3580    optab.  OP1 is an rtx for the constant operand.  */
3581
3582 static rtx
3583 expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
3584                             rtx target, int unsignedp, int max_cost)
3585 {
3586   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3587   machine_mode wider_mode;
3588   optab moptab;
3589   rtx tem;
3590   int size;
3591   bool speed = optimize_insn_for_speed_p ();
3592
3593   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3594
3595   wider_mode = GET_MODE_WIDER_MODE (mode);
3596   size = GET_MODE_BITSIZE (mode);
3597
3598   /* Firstly, try using a multiplication insn that only generates the needed
3599      high part of the product, and in the sign flavor of unsignedp.  */
3600   if (mul_highpart_cost (speed, mode) < max_cost)
3601     {
3602       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3603       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3604                           unsignedp, OPTAB_DIRECT);
3605       if (tem)
3606         return tem;
3607     }
3608
3609   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3610      Need to adjust the result after the multiplication.  */
3611   if (size - 1 < BITS_PER_WORD
3612       && (mul_highpart_cost (speed, mode)
3613           + 2 * shift_cost (speed, mode, size-1)
3614           + 4 * add_cost (speed, mode) < max_cost))
3615     {
3616       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3617       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3618                           unsignedp, OPTAB_DIRECT);
3619       if (tem)
3620         /* We used the wrong signedness.  Adjust the result.  */
3621         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3622                                             tem, unsignedp);
3623     }
3624
3625   /* Try widening multiplication.  */
3626   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3627   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3628       && mul_widen_cost (speed, wider_mode) < max_cost)
3629     {
3630       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3631                           unsignedp, OPTAB_WIDEN);
3632       if (tem)
3633         return extract_high_half (mode, tem);
3634     }
3635
3636   /* Try widening the mode and perform a non-widening multiplication.  */
3637   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3638       && size - 1 < BITS_PER_WORD
3639       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3640           < max_cost))
3641     {
3642       rtx_insn *insns;
3643       rtx wop0, wop1;
3644
3645       /* We need to widen the operands, for example to ensure the
3646          constant multiplier is correctly sign or zero extended.
3647          Use a sequence to clean-up any instructions emitted by
3648          the conversions if things don't work out.  */
3649       start_sequence ();
3650       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3651       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3652       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3653                           unsignedp, OPTAB_WIDEN);
3654       insns = get_insns ();
3655       end_sequence ();
3656
3657       if (tem)
3658         {
3659           emit_insn (insns);
3660           return extract_high_half (mode, tem);
3661         }
3662     }
3663
3664   /* Try widening multiplication of opposite signedness, and adjust.  */
3665   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3666   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3667       && size - 1 < BITS_PER_WORD
3668       && (mul_widen_cost (speed, wider_mode)
3669           + 2 * shift_cost (speed, mode, size-1)
3670           + 4 * add_cost (speed, mode) < max_cost))
3671     {
3672       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3673                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3674       if (tem != 0)
3675         {
3676           tem = extract_high_half (mode, tem);
3677           /* We used the wrong signedness.  Adjust the result.  */
3678           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3679                                               target, unsignedp);
3680         }
3681     }
3682
3683   return 0;
3684 }
3685
3686 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3687    putting the high half of the result in TARGET if that is convenient,
3688    and return where the result is.  If the operation can not be performed,
3689    0 is returned.
3690
3691    MODE is the mode of operation and result.
3692
3693    UNSIGNEDP nonzero means unsigned multiply.
3694
3695    MAX_COST is the total allowed cost for the expanded RTL.  */
3696
3697 static rtx
3698 expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
3699                       rtx target, int unsignedp, int max_cost)
3700 {
3701   machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3702   unsigned HOST_WIDE_INT cnst1;
3703   int extra_cost;
3704   bool sign_adjust = false;
3705   enum mult_variant variant;
3706   struct algorithm alg;
3707   rtx tem;
3708   bool speed = optimize_insn_for_speed_p ();
3709
3710   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3711   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3712   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3713
3714   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3715
3716   /* We can't optimize modes wider than BITS_PER_WORD.
3717      ??? We might be able to perform double-word arithmetic if
3718      mode == word_mode, however all the cost calculations in
3719      synth_mult etc. assume single-word operations.  */
3720   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3721     return expmed_mult_highpart_optab (mode, op0, op1, target,
3722                                        unsignedp, max_cost);
3723
3724   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3725
3726   /* Check whether we try to multiply by a negative constant.  */
3727   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3728     {
3729       sign_adjust = true;
3730       extra_cost += add_cost (speed, mode);
3731     }
3732
3733   /* See whether shift/add multiplication is cheap enough.  */
3734   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3735                            max_cost - extra_cost))
3736     {
3737       /* See whether the specialized multiplication optabs are
3738          cheaper than the shift/add version.  */
3739       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3740                                         alg.cost.cost + extra_cost);
3741       if (tem)
3742         return tem;
3743
3744       tem = convert_to_mode (wider_mode, op0, unsignedp);
3745       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3746       tem = extract_high_half (mode, tem);
3747
3748       /* Adjust result for signedness.  */
3749       if (sign_adjust)
3750         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3751
3752       return tem;
3753     }
3754   return expmed_mult_highpart_optab (mode, op0, op1, target,
3755                                      unsignedp, max_cost);
3756 }
3757
3758
3759 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3760
3761 static rtx
3762 expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3763 {
3764   rtx result, temp, shift;
3765   rtx_code_label *label;
3766   int logd;
3767   int prec = GET_MODE_PRECISION (mode);
3768
3769   logd = floor_log2 (d);
3770   result = gen_reg_rtx (mode);
3771
3772   /* Avoid conditional branches when they're expensive.  */
3773   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3774       && optimize_insn_for_speed_p ())
3775     {
3776       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3777                                       mode, 0, -1);
3778       if (signmask)
3779         {
3780           HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
3781           signmask = force_reg (mode, signmask);
3782           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3783
3784           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3785              which instruction sequence to use.  If logical right shifts
3786              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3787              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3788
3789           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3790           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3791               || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
3792                   > COSTS_N_INSNS (2)))
3793             {
3794               temp = expand_binop (mode, xor_optab, op0, signmask,
3795                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3796               temp = expand_binop (mode, sub_optab, temp, signmask,
3797                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3798               temp = expand_binop (mode, and_optab, temp,
3799                                    gen_int_mode (masklow, mode),
3800                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3801               temp = expand_binop (mode, xor_optab, temp, signmask,
3802                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3803               temp = expand_binop (mode, sub_optab, temp, signmask,
3804                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3805             }
3806           else
3807             {
3808               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3809                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3810               signmask = force_reg (mode, signmask);
3811
3812               temp = expand_binop (mode, add_optab, op0, signmask,
3813                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3814               temp = expand_binop (mode, and_optab, temp,
3815                                    gen_int_mode (masklow, mode),
3816                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3817               temp = expand_binop (mode, sub_optab, temp, signmask,
3818                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3819             }
3820           return temp;
3821         }
3822     }
3823
3824   /* Mask contains the mode's signbit and the significant bits of the
3825      modulus.  By including the signbit in the operation, many targets
3826      can avoid an explicit compare operation in the following comparison
3827      against zero.  */
3828   wide_int mask = wi::mask (logd, false, prec);
3829   mask = wi::set_bit (mask, prec - 1);
3830
3831   temp = expand_binop (mode, and_optab, op0,
3832                        immed_wide_int_const (mask, mode),
3833                        result, 1, OPTAB_LIB_WIDEN);
3834   if (temp != result)
3835     emit_move_insn (result, temp);
3836
3837   label = gen_label_rtx ();
3838   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3839
3840   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3841                        0, OPTAB_LIB_WIDEN);
3842
3843   mask = wi::mask (logd, true, prec);
3844   temp = expand_binop (mode, ior_optab, temp,
3845                        immed_wide_int_const (mask, mode),
3846                        result, 1, OPTAB_LIB_WIDEN);
3847   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3848                        0, OPTAB_LIB_WIDEN);
3849   if (temp != result)
3850     emit_move_insn (result, temp);
3851   emit_label (label);
3852   return result;
3853 }
3854
3855 /* Expand signed division of OP0 by a power of two D in mode MODE.
3856    This routine is only called for positive values of D.  */
3857
3858 static rtx
3859 expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3860 {
3861   rtx temp;
3862   rtx_code_label *label;
3863   int logd;
3864
3865   logd = floor_log2 (d);
3866
3867   if (d == 2
3868       && BRANCH_COST (optimize_insn_for_speed_p (),
3869                       false) >= 1)
3870     {
3871       temp = gen_reg_rtx (mode);
3872       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3873       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3874                            0, OPTAB_LIB_WIDEN);
3875       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3876     }
3877
3878   if (HAVE_conditional_move
3879       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
3880     {
3881       rtx temp2;
3882
3883       start_sequence ();
3884       temp2 = copy_to_mode_reg (mode, op0);
3885       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3886                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3887       temp = force_reg (mode, temp);
3888
3889       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3890       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3891                                      mode, temp, temp2, mode, 0);
3892       if (temp2)
3893         {
3894           rtx_insn *seq = get_insns ();
3895           end_sequence ();
3896           emit_insn (seq);
3897           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3898         }
3899       end_sequence ();
3900     }
3901
3902   if (BRANCH_COST (optimize_insn_for_speed_p (),
3903                    false) >= 2)
3904     {
3905       int ushift = GET_MODE_BITSIZE (mode) - logd;
3906
3907       temp = gen_reg_rtx (mode);
3908       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3909       if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3910           || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3911              > COSTS_N_INSNS (1))
3912         temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3913                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3914       else
3915         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3916                              ushift, NULL_RTX, 1);
3917       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3918                            0, OPTAB_LIB_WIDEN);
3919       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3920     }
3921
3922   label = gen_label_rtx ();
3923   temp = copy_to_mode_reg (mode, op0);
3924   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3925   expand_inc (temp, gen_int_mode (d - 1, mode));
3926   emit_label (label);
3927   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3928 }
3929 \f
3930 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3931    if that is convenient, and returning where the result is.
3932    You may request either the quotient or the remainder as the result;
3933    specify REM_FLAG nonzero to get the remainder.
3934
3935    CODE is the expression code for which kind of division this is;
3936    it controls how rounding is done.  MODE is the machine mode to use.
3937    UNSIGNEDP nonzero means do unsigned division.  */
3938
3939 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3940    and then correct it by or'ing in missing high bits
3941    if result of ANDI is nonzero.
3942    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3943    This could optimize to a bfexts instruction.
3944    But C doesn't use these operations, so their optimizations are
3945    left for later.  */
3946 /* ??? For modulo, we don't actually need the highpart of the first product,
3947    the low part will do nicely.  And for small divisors, the second multiply
3948    can also be a low-part only multiply or even be completely left out.
3949    E.g. to calculate the remainder of a division by 3 with a 32 bit
3950    multiply, multiply with 0x55555556 and extract the upper two bits;
3951    the result is exact for inputs up to 0x1fffffff.
3952    The input range can be reduced by using cross-sum rules.
3953    For odd divisors >= 3, the following table gives right shift counts
3954    so that if a number is shifted by an integer multiple of the given
3955    amount, the remainder stays the same:
3956    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3957    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3958    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3959    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3960    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3961
3962    Cross-sum rules for even numbers can be derived by leaving as many bits
3963    to the right alone as the divisor has zeros to the right.
3964    E.g. if x is an unsigned 32 bit number:
3965    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3966    */
3967
3968 rtx
3969 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
3970                rtx op0, rtx op1, rtx target, int unsignedp)
3971 {
3972   machine_mode compute_mode;
3973   rtx tquotient;
3974   rtx quotient = 0, remainder = 0;
3975   rtx_insn *last;
3976   int size;
3977   rtx_insn *insn;
3978   optab optab1, optab2;
3979   int op1_is_constant, op1_is_pow2 = 0;
3980   int max_cost, extra_cost;
3981   static HOST_WIDE_INT last_div_const = 0;
3982   bool speed = optimize_insn_for_speed_p ();
3983
3984   op1_is_constant = CONST_INT_P (op1);
3985   if (op1_is_constant)
3986     {
3987       unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1);
3988       if (unsignedp)
3989         ext_op1 &= GET_MODE_MASK (mode);
3990       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3991                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3992     }
3993
3994   /*
3995      This is the structure of expand_divmod:
3996
3997      First comes code to fix up the operands so we can perform the operations
3998      correctly and efficiently.
3999
4000      Second comes a switch statement with code specific for each rounding mode.
4001      For some special operands this code emits all RTL for the desired
4002      operation, for other cases, it generates only a quotient and stores it in
4003      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4004      to indicate that it has not done anything.
4005
4006      Last comes code that finishes the operation.  If QUOTIENT is set and
4007      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4008      QUOTIENT is not set, it is computed using trunc rounding.
4009
4010      We try to generate special code for division and remainder when OP1 is a
4011      constant.  If |OP1| = 2**n we can use shifts and some other fast
4012      operations.  For other values of OP1, we compute a carefully selected
4013      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4014      by m.
4015
4016      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4017      half of the product.  Different strategies for generating the product are
4018      implemented in expmed_mult_highpart.
4019
4020      If what we actually want is the remainder, we generate that by another
4021      by-constant multiplication and a subtraction.  */
4022
4023   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4024      code below will malfunction if we are, so check here and handle
4025      the special case if so.  */
4026   if (op1 == const1_rtx)
4027     return rem_flag ? const0_rtx : op0;
4028
4029     /* When dividing by -1, we could get an overflow.
4030      negv_optab can handle overflows.  */
4031   if (! unsignedp && op1 == constm1_rtx)
4032     {
4033       if (rem_flag)
4034         return const0_rtx;
4035       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4036                           ? negv_optab : neg_optab, op0, target, 0);
4037     }
4038
4039   if (target
4040       /* Don't use the function value register as a target
4041          since we have to read it as well as write it,
4042          and function-inlining gets confused by this.  */
4043       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4044           /* Don't clobber an operand while doing a multi-step calculation.  */
4045           || ((rem_flag || op1_is_constant)
4046               && (reg_mentioned_p (target, op0)
4047                   || (MEM_P (op0) && MEM_P (target))))
4048           || reg_mentioned_p (target, op1)
4049           || (MEM_P (op1) && MEM_P (target))))
4050     target = 0;
4051
4052   /* Get the mode in which to perform this computation.  Normally it will
4053      be MODE, but sometimes we can't do the desired operation in MODE.
4054      If so, pick a wider mode in which we can do the operation.  Convert
4055      to that mode at the start to avoid repeated conversions.
4056
4057      First see what operations we need.  These depend on the expression
4058      we are evaluating.  (We assume that divxx3 insns exist under the
4059      same conditions that modxx3 insns and that these insns don't normally
4060      fail.  If these assumptions are not correct, we may generate less
4061      efficient code in some cases.)
4062
4063      Then see if we find a mode in which we can open-code that operation
4064      (either a division, modulus, or shift).  Finally, check for the smallest
4065      mode for which we can do the operation with a library call.  */
4066
4067   /* We might want to refine this now that we have division-by-constant
4068      optimization.  Since expmed_mult_highpart tries so many variants, it is
4069      not straightforward to generalize this.  Maybe we should make an array
4070      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4071
4072   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
4073             ? (unsignedp ? lshr_optab : ashr_optab)
4074             : (unsignedp ? udiv_optab : sdiv_optab));
4075   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
4076             ? optab1
4077             : (unsignedp ? udivmod_optab : sdivmod_optab));
4078
4079   for (compute_mode = mode; compute_mode != VOIDmode;
4080        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4081     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4082         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4083       break;
4084
4085   if (compute_mode == VOIDmode)
4086     for (compute_mode = mode; compute_mode != VOIDmode;
4087          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4088       if (optab_libfunc (optab1, compute_mode)
4089           || optab_libfunc (optab2, compute_mode))
4090         break;
4091
4092   /* If we still couldn't find a mode, use MODE, but expand_binop will
4093      probably die.  */
4094   if (compute_mode == VOIDmode)
4095     compute_mode = mode;
4096
4097   if (target && GET_MODE (target) == compute_mode)
4098     tquotient = target;
4099   else
4100     tquotient = gen_reg_rtx (compute_mode);
4101
4102   size = GET_MODE_BITSIZE (compute_mode);
4103 #if 0
4104   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4105      (mode), and thereby get better code when OP1 is a constant.  Do that
4106      later.  It will require going over all usages of SIZE below.  */
4107   size = GET_MODE_BITSIZE (mode);
4108 #endif
4109
4110   /* Only deduct something for a REM if the last divide done was
4111      for a different constant.   Then set the constant of the last
4112      divide.  */
4113   max_cost = (unsignedp
4114               ? udiv_cost (speed, compute_mode)
4115               : sdiv_cost (speed, compute_mode));
4116   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4117                      && INTVAL (op1) == last_div_const))
4118     max_cost -= (mul_cost (speed, compute_mode)
4119                  + add_cost (speed, compute_mode));
4120
4121   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4122
4123   /* Now convert to the best mode to use.  */
4124   if (compute_mode != mode)
4125     {
4126       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4127       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4128
4129       /* convert_modes may have placed op1 into a register, so we
4130          must recompute the following.  */
4131       op1_is_constant = CONST_INT_P (op1);
4132       op1_is_pow2 = (op1_is_constant
4133                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4134                           || (! unsignedp
4135                               && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1))))));
4136     }
4137
4138   /* If one of the operands is a volatile MEM, copy it into a register.  */
4139
4140   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4141     op0 = force_reg (compute_mode, op0);
4142   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4143     op1 = force_reg (compute_mode, op1);
4144
4145   /* If we need the remainder or if OP1 is constant, we need to
4146      put OP0 in a register in case it has any queued subexpressions.  */
4147   if (rem_flag || op1_is_constant)
4148     op0 = force_reg (compute_mode, op0);
4149
4150   last = get_last_insn ();
4151
4152   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4153   if (unsignedp)
4154     {
4155       if (code == FLOOR_DIV_EXPR)
4156         code = TRUNC_DIV_EXPR;
4157       if (code == FLOOR_MOD_EXPR)
4158         code = TRUNC_MOD_EXPR;
4159       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4160         code = TRUNC_DIV_EXPR;
4161     }
4162
4163   if (op1 != const0_rtx)
4164     switch (code)
4165       {
4166       case TRUNC_MOD_EXPR:
4167       case TRUNC_DIV_EXPR:
4168         if (op1_is_constant)
4169           {
4170             if (unsignedp)
4171               {
4172                 unsigned HOST_WIDE_INT mh, ml;
4173                 int pre_shift, post_shift;
4174                 int dummy;
4175                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4176                                             & GET_MODE_MASK (compute_mode));
4177
4178                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4179                   {
4180                     pre_shift = floor_log2 (d);
4181                     if (rem_flag)
4182                       {
4183                         unsigned HOST_WIDE_INT mask
4184                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4185                         remainder
4186                           = expand_binop (compute_mode, and_optab, op0,
4187                                           gen_int_mode (mask, compute_mode),
4188                                           remainder, 1,
4189                                           OPTAB_LIB_WIDEN);
4190                         if (remainder)
4191                           return gen_lowpart (mode, remainder);
4192                       }
4193                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4194                                              pre_shift, tquotient, 1);
4195                   }
4196                 else if (size <= HOST_BITS_PER_WIDE_INT)
4197                   {
4198                     if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4199                       {
4200                         /* Most significant bit of divisor is set; emit an scc
4201                            insn.  */
4202                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4203                                                           compute_mode, 1, 1);
4204                       }
4205                     else
4206                       {
4207                         /* Find a suitable multiplier and right shift count
4208                            instead of multiplying with D.  */
4209
4210                         mh = choose_multiplier (d, size, size,
4211                                                 &ml, &post_shift, &dummy);
4212
4213                         /* If the suggested multiplier is more than SIZE bits,
4214                            we can do better for even divisors, using an
4215                            initial right shift.  */
4216                         if (mh != 0 && (d & 1) == 0)
4217                           {
4218                             pre_shift = floor_log2 (d & -d);
4219                             mh = choose_multiplier (d >> pre_shift, size,
4220                                                     size - pre_shift,
4221                                                     &ml, &post_shift, &dummy);
4222                             gcc_assert (!mh);
4223                           }
4224                         else
4225                           pre_shift = 0;
4226
4227                         if (mh != 0)
4228                           {
4229                             rtx t1, t2, t3, t4;
4230
4231                             if (post_shift - 1 >= BITS_PER_WORD)
4232                               goto fail1;
4233
4234                             extra_cost
4235                               = (shift_cost (speed, compute_mode, post_shift - 1)
4236                                  + shift_cost (speed, compute_mode, 1)
4237                                  + 2 * add_cost (speed, compute_mode));
4238                             t1 = expmed_mult_highpart
4239                               (compute_mode, op0,
4240                                gen_int_mode (ml, compute_mode),
4241                                NULL_RTX, 1, max_cost - extra_cost);
4242                             if (t1 == 0)
4243                               goto fail1;
4244                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4245                                                                op0, t1),
4246                                                 NULL_RTX);
4247                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4248                                                t2, 1, NULL_RTX, 1);
4249                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4250                                                               t1, t3),
4251                                                 NULL_RTX);
4252                             quotient = expand_shift
4253                               (RSHIFT_EXPR, compute_mode, t4,
4254                                post_shift - 1, tquotient, 1);
4255                           }
4256                         else
4257                           {
4258                             rtx t1, t2;
4259
4260                             if (pre_shift >= BITS_PER_WORD
4261                                 || post_shift >= BITS_PER_WORD)
4262                               goto fail1;
4263
4264                             t1 = expand_shift
4265                               (RSHIFT_EXPR, compute_mode, op0,
4266                                pre_shift, NULL_RTX, 1);
4267                             extra_cost
4268                               = (shift_cost (speed, compute_mode, pre_shift)
4269                                  + shift_cost (speed, compute_mode, post_shift));
4270                             t2 = expmed_mult_highpart
4271                               (compute_mode, t1,
4272                                gen_int_mode (ml, compute_mode),
4273                                NULL_RTX, 1, max_cost - extra_cost);
4274                             if (t2 == 0)
4275                               goto fail1;
4276                             quotient = expand_shift
4277                               (RSHIFT_EXPR, compute_mode, t2,
4278                                post_shift, tquotient, 1);
4279                           }
4280                       }
4281                   }
4282                 else            /* Too wide mode to use tricky code */
4283                   break;
4284
4285                 insn = get_last_insn ();
4286                 if (insn != last)
4287                   set_dst_reg_note (insn, REG_EQUAL,
4288                                     gen_rtx_UDIV (compute_mode, op0, op1),
4289                                     quotient);
4290               }
4291             else                /* TRUNC_DIV, signed */
4292               {
4293                 unsigned HOST_WIDE_INT ml;
4294                 int lgup, post_shift;
4295                 rtx mlr;
4296                 HOST_WIDE_INT d = INTVAL (op1);
4297                 unsigned HOST_WIDE_INT abs_d;
4298
4299                 /* Since d might be INT_MIN, we have to cast to
4300                    unsigned HOST_WIDE_INT before negating to avoid
4301                    undefined signed overflow.  */
4302                 abs_d = (d >= 0
4303                          ? (unsigned HOST_WIDE_INT) d
4304                          : - (unsigned HOST_WIDE_INT) d);
4305
4306                 /* n rem d = n rem -d */
4307                 if (rem_flag && d < 0)
4308                   {
4309                     d = abs_d;
4310                     op1 = gen_int_mode (abs_d, compute_mode);
4311                   }
4312
4313                 if (d == 1)
4314                   quotient = op0;
4315                 else if (d == -1)
4316                   quotient = expand_unop (compute_mode, neg_optab, op0,
4317                                           tquotient, 0);
4318                 else if (HOST_BITS_PER_WIDE_INT >= size
4319                          && abs_d == HOST_WIDE_INT_1U << (size - 1))
4320                   {
4321                     /* This case is not handled correctly below.  */
4322                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4323                                                 compute_mode, 1, 1);
4324                     if (quotient == 0)
4325                       goto fail1;
4326                   }
4327                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4328                          && (rem_flag
4329                              ? smod_pow2_cheap (speed, compute_mode)
4330                              : sdiv_pow2_cheap (speed, compute_mode))
4331                          /* We assume that cheap metric is true if the
4332                             optab has an expander for this mode.  */
4333                          && ((optab_handler ((rem_flag ? smod_optab
4334                                               : sdiv_optab),
4335                                              compute_mode)
4336                               != CODE_FOR_nothing)
4337                              || (optab_handler (sdivmod_optab,
4338                                                 compute_mode)
4339                                  != CODE_FOR_nothing)))
4340                   ;
4341                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4342                   {
4343                     if (rem_flag)
4344                       {
4345                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4346                         if (remainder)
4347                           return gen_lowpart (mode, remainder);
4348                       }
4349
4350                     if (sdiv_pow2_cheap (speed, compute_mode)
4351                         && ((optab_handler (sdiv_optab, compute_mode)
4352                              != CODE_FOR_nothing)
4353                             || (optab_handler (sdivmod_optab, compute_mode)
4354                                 != CODE_FOR_nothing)))
4355                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4356                                                 compute_mode, op0,
4357                                                 gen_int_mode (abs_d,
4358                                                               compute_mode),
4359                                                 NULL_RTX, 0);
4360                     else
4361                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4362
4363                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4364                        negate the quotient.  */
4365                     if (d < 0)
4366                       {
4367                         insn = get_last_insn ();
4368                         if (insn != last
4369                             && abs_d < (HOST_WIDE_INT_1U
4370                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4371                           set_dst_reg_note (insn, REG_EQUAL,
4372                                             gen_rtx_DIV (compute_mode, op0,
4373                                                          gen_int_mode
4374                                                            (abs_d,
4375                                                             compute_mode)),
4376                                             quotient);
4377
4378                         quotient = expand_unop (compute_mode, neg_optab,
4379                                                 quotient, quotient, 0);
4380                       }
4381                   }
4382                 else if (size <= HOST_BITS_PER_WIDE_INT)
4383                   {
4384                     choose_multiplier (abs_d, size, size - 1,
4385                                        &ml, &post_shift, &lgup);
4386                     if (ml < HOST_WIDE_INT_1U << (size - 1))
4387                       {
4388                         rtx t1, t2, t3;
4389
4390                         if (post_shift >= BITS_PER_WORD
4391                             || size - 1 >= BITS_PER_WORD)
4392                           goto fail1;
4393
4394                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4395                                       + shift_cost (speed, compute_mode, size - 1)
4396                                       + add_cost (speed, compute_mode));
4397                         t1 = expmed_mult_highpart
4398                           (compute_mode, op0, gen_int_mode (ml, compute_mode),
4399                            NULL_RTX, 0, max_cost - extra_cost);
4400                         if (t1 == 0)
4401                           goto fail1;
4402                         t2 = expand_shift
4403                           (RSHIFT_EXPR, compute_mode, t1,
4404                            post_shift, NULL_RTX, 0);
4405                         t3 = expand_shift
4406                           (RSHIFT_EXPR, compute_mode, op0,
4407                            size - 1, NULL_RTX, 0);
4408                         if (d < 0)
4409                           quotient
4410                             = force_operand (gen_rtx_MINUS (compute_mode,
4411                                                             t3, t2),
4412                                              tquotient);
4413                         else
4414                           quotient
4415                             = force_operand (gen_rtx_MINUS (compute_mode,
4416                                                             t2, t3),
4417                                              tquotient);
4418                       }
4419                     else
4420                       {
4421                         rtx t1, t2, t3, t4;
4422
4423                         if (post_shift >= BITS_PER_WORD
4424                             || size - 1 >= BITS_PER_WORD)
4425                           goto fail1;
4426
4427                         ml |= HOST_WIDE_INT_M1U << (size - 1);
4428                         mlr = gen_int_mode (ml, compute_mode);
4429                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4430                                       + shift_cost (speed, compute_mode, size - 1)
4431                                       + 2 * add_cost (speed, compute_mode));
4432                         t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4433                                                    NULL_RTX, 0,
4434                                                    max_cost - extra_cost);
4435                         if (t1 == 0)
4436                           goto fail1;
4437                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4438                                                           t1, op0),
4439                                             NULL_RTX);
4440                         t3 = expand_shift
4441                           (RSHIFT_EXPR, compute_mode, t2,
4442                            post_shift, NULL_RTX, 0);
4443                         t4 = expand_shift
4444                           (RSHIFT_EXPR, compute_mode, op0,
4445                            size - 1, NULL_RTX, 0);
4446                         if (d < 0)
4447                           quotient
4448                             = force_operand (gen_rtx_MINUS (compute_mode,
4449                                                             t4, t3),
4450                                              tquotient);
4451                         else
4452                           quotient
4453                             = force_operand (gen_rtx_MINUS (compute_mode,
4454                                                             t3, t4),
4455                                              tquotient);
4456                       }
4457                   }
4458                 else            /* Too wide mode to use tricky code */
4459                   break;
4460
4461                 insn = get_last_insn ();
4462                 if (insn != last)
4463                   set_dst_reg_note (insn, REG_EQUAL,
4464                                     gen_rtx_DIV (compute_mode, op0, op1),
4465                                     quotient);
4466               }
4467             break;
4468           }
4469       fail1:
4470         delete_insns_since (last);
4471         break;
4472
4473       case FLOOR_DIV_EXPR:
4474       case FLOOR_MOD_EXPR:
4475       /* We will come here only for signed operations.  */
4476         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4477           {
4478             unsigned HOST_WIDE_INT mh, ml;
4479             int pre_shift, lgup, post_shift;
4480             HOST_WIDE_INT d = INTVAL (op1);
4481
4482             if (d > 0)
4483               {
4484                 /* We could just as easily deal with negative constants here,
4485                    but it does not seem worth the trouble for GCC 2.6.  */
4486                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4487                   {
4488                     pre_shift = floor_log2 (d);
4489                     if (rem_flag)
4490                       {
4491                         unsigned HOST_WIDE_INT mask
4492                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4493                         remainder = expand_binop
4494                           (compute_mode, and_optab, op0,
4495                            gen_int_mode (mask, compute_mode),
4496                            remainder, 0, OPTAB_LIB_WIDEN);
4497                         if (remainder)
4498                           return gen_lowpart (mode, remainder);
4499                       }
4500                     quotient = expand_shift
4501                       (RSHIFT_EXPR, compute_mode, op0,
4502                        pre_shift, tquotient, 0);
4503                   }
4504                 else
4505                   {
4506                     rtx t1, t2, t3, t4;
4507
4508                     mh = choose_multiplier (d, size, size - 1,
4509                                             &ml, &post_shift, &lgup);
4510                     gcc_assert (!mh);
4511
4512                     if (post_shift < BITS_PER_WORD
4513                         && size - 1 < BITS_PER_WORD)
4514                       {
4515                         t1 = expand_shift
4516                           (RSHIFT_EXPR, compute_mode, op0,
4517                            size - 1, NULL_RTX, 0);
4518                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4519                                            NULL_RTX, 0, OPTAB_WIDEN);
4520                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4521                                       + shift_cost (speed, compute_mode, size - 1)
4522                                       + 2 * add_cost (speed, compute_mode));
4523                         t3 = expmed_mult_highpart
4524                           (compute_mode, t2, gen_int_mode (ml, compute_mode),
4525                            NULL_RTX, 1, max_cost - extra_cost);
4526                         if (t3 != 0)
4527                           {
4528                             t4 = expand_shift
4529                               (RSHIFT_EXPR, compute_mode, t3,
4530                                post_shift, NULL_RTX, 1);
4531                             quotient = expand_binop (compute_mode, xor_optab,
4532                                                      t4, t1, tquotient, 0,
4533                                                      OPTAB_WIDEN);
4534                           }
4535                       }
4536                   }
4537               }
4538             else
4539               {
4540                 rtx nsign, t1, t2, t3, t4;
4541                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4542                                                   op0, constm1_rtx), NULL_RTX);
4543                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4544                                    0, OPTAB_WIDEN);
4545                 nsign = expand_shift
4546                   (RSHIFT_EXPR, compute_mode, t2,
4547                    size - 1, NULL_RTX, 0);
4548                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4549                                     NULL_RTX);
4550                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4551                                     NULL_RTX, 0);
4552                 if (t4)
4553                   {
4554                     rtx t5;
4555                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4556                                       NULL_RTX, 0);
4557                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4558                                                             t4, t5),
4559                                               tquotient);
4560                   }
4561               }
4562           }
4563
4564         if (quotient != 0)
4565           break;
4566         delete_insns_since (last);
4567
4568         /* Try using an instruction that produces both the quotient and
4569            remainder, using truncation.  We can easily compensate the quotient
4570            or remainder to get floor rounding, once we have the remainder.
4571            Notice that we compute also the final remainder value here,
4572            and return the result right away.  */
4573         if (target == 0 || GET_MODE (target) != compute_mode)
4574           target = gen_reg_rtx (compute_mode);
4575
4576         if (rem_flag)
4577           {
4578             remainder
4579               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4580             quotient = gen_reg_rtx (compute_mode);
4581           }
4582         else
4583           {
4584             quotient
4585               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4586             remainder = gen_reg_rtx (compute_mode);
4587           }
4588
4589         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4590                                  quotient, remainder, 0))
4591           {
4592             /* This could be computed with a branch-less sequence.
4593                Save that for later.  */
4594             rtx tem;
4595             rtx_code_label *label = gen_label_rtx ();
4596             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4597             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4598                                 NULL_RTX, 0, OPTAB_WIDEN);
4599             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4600             expand_dec (quotient, const1_rtx);
4601             expand_inc (remainder, op1);
4602             emit_label (label);
4603             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4604           }
4605
4606         /* No luck with division elimination or divmod.  Have to do it
4607            by conditionally adjusting op0 *and* the result.  */
4608         {
4609           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4610           rtx adjusted_op0;
4611           rtx tem;
4612
4613           quotient = gen_reg_rtx (compute_mode);
4614           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4615           label1 = gen_label_rtx ();
4616           label2 = gen_label_rtx ();
4617           label3 = gen_label_rtx ();
4618           label4 = gen_label_rtx ();
4619           label5 = gen_label_rtx ();
4620           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4621           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4622           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4623                               quotient, 0, OPTAB_LIB_WIDEN);
4624           if (tem != quotient)
4625             emit_move_insn (quotient, tem);
4626           emit_jump_insn (targetm.gen_jump (label5));
4627           emit_barrier ();
4628           emit_label (label1);
4629           expand_inc (adjusted_op0, const1_rtx);
4630           emit_jump_insn (targetm.gen_jump (label4));
4631           emit_barrier ();
4632           emit_label (label2);
4633           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4634           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4635                               quotient, 0, OPTAB_LIB_WIDEN);
4636           if (tem != quotient)
4637             emit_move_insn (quotient, tem);
4638           emit_jump_insn (targetm.gen_jump (label5));
4639           emit_barrier ();
4640           emit_label (label3);
4641           expand_dec (adjusted_op0, const1_rtx);
4642           emit_label (label4);
4643           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4644                               quotient, 0, OPTAB_LIB_WIDEN);
4645           if (tem != quotient)
4646             emit_move_insn (quotient, tem);
4647           expand_dec (quotient, const1_rtx);
4648           emit_label (label5);
4649         }
4650         break;
4651
4652       case CEIL_DIV_EXPR:
4653       case CEIL_MOD_EXPR:
4654         if (unsignedp)
4655           {
4656             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4657               {
4658                 rtx t1, t2, t3;
4659                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4660                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4661                                    floor_log2 (d), tquotient, 1);
4662                 t2 = expand_binop (compute_mode, and_optab, op0,
4663                                    gen_int_mode (d - 1, compute_mode),
4664                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4665                 t3 = gen_reg_rtx (compute_mode);
4666                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4667                                       compute_mode, 1, 1);
4668                 if (t3 == 0)
4669                   {
4670                     rtx_code_label *lab;
4671                     lab = gen_label_rtx ();
4672                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4673                     expand_inc (t1, const1_rtx);
4674                     emit_label (lab);
4675                     quotient = t1;
4676                   }
4677                 else
4678                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4679                                                           t1, t3),
4680                                             tquotient);
4681                 break;
4682               }
4683
4684             /* Try using an instruction that produces both the quotient and
4685                remainder, using truncation.  We can easily compensate the
4686                quotient or remainder to get ceiling rounding, once we have the
4687                remainder.  Notice that we compute also the final remainder
4688                value here, and return the result right away.  */
4689             if (target == 0 || GET_MODE (target) != compute_mode)
4690               target = gen_reg_rtx (compute_mode);
4691
4692             if (rem_flag)
4693               {
4694                 remainder = (REG_P (target)
4695                              ? target : gen_reg_rtx (compute_mode));
4696                 quotient = gen_reg_rtx (compute_mode);
4697               }
4698             else
4699               {
4700                 quotient = (REG_P (target)
4701                             ? target : gen_reg_rtx (compute_mode));
4702                 remainder = gen_reg_rtx (compute_mode);
4703               }
4704
4705             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4706                                      remainder, 1))
4707               {
4708                 /* This could be computed with a branch-less sequence.
4709                    Save that for later.  */
4710                 rtx_code_label *label = gen_label_rtx ();
4711                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4712                                  compute_mode, label);
4713                 expand_inc (quotient, const1_rtx);
4714                 expand_dec (remainder, op1);
4715                 emit_label (label);
4716                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4717               }
4718
4719             /* No luck with division elimination or divmod.  Have to do it
4720                by conditionally adjusting op0 *and* the result.  */
4721             {
4722               rtx_code_label *label1, *label2;
4723               rtx adjusted_op0, tem;
4724
4725               quotient = gen_reg_rtx (compute_mode);
4726               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4727               label1 = gen_label_rtx ();
4728               label2 = gen_label_rtx ();
4729               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4730                                compute_mode, label1);
4731               emit_move_insn  (quotient, const0_rtx);
4732               emit_jump_insn (targetm.gen_jump (label2));
4733               emit_barrier ();
4734               emit_label (label1);
4735               expand_dec (adjusted_op0, const1_rtx);
4736               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4737                                   quotient, 1, OPTAB_LIB_WIDEN);
4738               if (tem != quotient)
4739                 emit_move_insn (quotient, tem);
4740               expand_inc (quotient, const1_rtx);
4741               emit_label (label2);
4742             }
4743           }
4744         else /* signed */
4745           {
4746             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4747                 && INTVAL (op1) >= 0)
4748               {
4749                 /* This is extremely similar to the code for the unsigned case
4750                    above.  For 2.7 we should merge these variants, but for
4751                    2.6.1 I don't want to touch the code for unsigned since that
4752                    get used in C.  The signed case will only be used by other
4753                    languages (Ada).  */
4754
4755                 rtx t1, t2, t3;
4756                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4757                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4758                                    floor_log2 (d), tquotient, 0);
4759                 t2 = expand_binop (compute_mode, and_optab, op0,
4760                                    gen_int_mode (d - 1, compute_mode),
4761                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4762                 t3 = gen_reg_rtx (compute_mode);
4763                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4764                                       compute_mode, 1, 1);
4765                 if (t3 == 0)
4766                   {
4767                     rtx_code_label *lab;
4768                     lab = gen_label_rtx ();
4769                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4770                     expand_inc (t1, const1_rtx);
4771                     emit_label (lab);
4772                     quotient = t1;
4773                   }
4774                 else
4775                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4776                                                           t1, t3),
4777                                             tquotient);
4778                 break;
4779               }
4780
4781             /* Try using an instruction that produces both the quotient and
4782                remainder, using truncation.  We can easily compensate the
4783                quotient or remainder to get ceiling rounding, once we have the
4784                remainder.  Notice that we compute also the final remainder
4785                value here, and return the result right away.  */
4786             if (target == 0 || GET_MODE (target) != compute_mode)
4787               target = gen_reg_rtx (compute_mode);
4788             if (rem_flag)
4789               {
4790                 remainder= (REG_P (target)
4791                             ? target : gen_reg_rtx (compute_mode));
4792                 quotient = gen_reg_rtx (compute_mode);
4793               }
4794             else
4795               {
4796                 quotient = (REG_P (target)
4797                             ? target : gen_reg_rtx (compute_mode));
4798                 remainder = gen_reg_rtx (compute_mode);
4799               }
4800
4801             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4802                                      remainder, 0))
4803               {
4804                 /* This could be computed with a branch-less sequence.
4805                    Save that for later.  */
4806                 rtx tem;
4807                 rtx_code_label *label = gen_label_rtx ();
4808                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4809                                  compute_mode, label);
4810                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4811                                     NULL_RTX, 0, OPTAB_WIDEN);
4812                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4813                 expand_inc (quotient, const1_rtx);
4814                 expand_dec (remainder, op1);
4815                 emit_label (label);
4816                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4817               }
4818
4819             /* No luck with division elimination or divmod.  Have to do it
4820                by conditionally adjusting op0 *and* the result.  */
4821             {
4822               rtx_code_label *label1, *label2, *label3, *label4, *label5;
4823               rtx adjusted_op0;
4824               rtx tem;
4825
4826               quotient = gen_reg_rtx (compute_mode);
4827               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4828               label1 = gen_label_rtx ();
4829               label2 = gen_label_rtx ();
4830               label3 = gen_label_rtx ();
4831               label4 = gen_label_rtx ();
4832               label5 = gen_label_rtx ();
4833               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4834               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4835                                compute_mode, label1);
4836               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4837                                   quotient, 0, OPTAB_LIB_WIDEN);
4838               if (tem != quotient)
4839                 emit_move_insn (quotient, tem);
4840               emit_jump_insn (targetm.gen_jump (label5));
4841               emit_barrier ();
4842               emit_label (label1);
4843               expand_dec (adjusted_op0, const1_rtx);
4844               emit_jump_insn (targetm.gen_jump (label4));
4845               emit_barrier ();
4846               emit_label (label2);
4847               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4848                                compute_mode, label3);
4849               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4850                                   quotient, 0, OPTAB_LIB_WIDEN);
4851               if (tem != quotient)
4852                 emit_move_insn (quotient, tem);
4853               emit_jump_insn (targetm.gen_jump (label5));
4854               emit_barrier ();
4855               emit_label (label3);
4856               expand_inc (adjusted_op0, const1_rtx);
4857               emit_label (label4);
4858               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4859                                   quotient, 0, OPTAB_LIB_WIDEN);
4860               if (tem != quotient)
4861                 emit_move_insn (quotient, tem);
4862               expand_inc (quotient, const1_rtx);
4863               emit_label (label5);
4864             }
4865           }
4866         break;
4867
4868       case EXACT_DIV_EXPR:
4869         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4870           {
4871             HOST_WIDE_INT d = INTVAL (op1);
4872             unsigned HOST_WIDE_INT ml;
4873             int pre_shift;
4874             rtx t1;
4875
4876             pre_shift = floor_log2 (d & -d);
4877             ml = invert_mod2n (d >> pre_shift, size);
4878             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4879                                pre_shift, NULL_RTX, unsignedp);
4880             quotient = expand_mult (compute_mode, t1,
4881                                     gen_int_mode (ml, compute_mode),
4882                                     NULL_RTX, 1);
4883
4884             insn = get_last_insn ();
4885             set_dst_reg_note (insn, REG_EQUAL,
4886                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4887                                               compute_mode, op0, op1),
4888                               quotient);
4889           }
4890         break;
4891
4892       case ROUND_DIV_EXPR:
4893       case ROUND_MOD_EXPR:
4894         if (unsignedp)
4895           {
4896             rtx tem;
4897             rtx_code_label *label;
4898             label = gen_label_rtx ();
4899             quotient = gen_reg_rtx (compute_mode);
4900             remainder = gen_reg_rtx (compute_mode);
4901             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4902               {
4903                 rtx tem;
4904                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4905                                          quotient, 1, OPTAB_LIB_WIDEN);
4906                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4907                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4908                                           remainder, 1, OPTAB_LIB_WIDEN);
4909               }
4910             tem = plus_constant (compute_mode, op1, -1);
4911             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4912             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4913             expand_inc (quotient, const1_rtx);
4914             expand_dec (remainder, op1);
4915             emit_label (label);
4916           }
4917         else
4918           {
4919             rtx abs_rem, abs_op1, tem, mask;
4920             rtx_code_label *label;
4921             label = gen_label_rtx ();
4922             quotient = gen_reg_rtx (compute_mode);
4923             remainder = gen_reg_rtx (compute_mode);
4924             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4925               {
4926                 rtx tem;
4927                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4928                                          quotient, 0, OPTAB_LIB_WIDEN);
4929                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4930                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4931                                           remainder, 0, OPTAB_LIB_WIDEN);
4932               }
4933             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4934             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4935             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4936                                 1, NULL_RTX, 1);
4937             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4938             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4939                                 NULL_RTX, 0, OPTAB_WIDEN);
4940             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4941                                  size - 1, NULL_RTX, 0);
4942             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4943                                 NULL_RTX, 0, OPTAB_WIDEN);
4944             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4945                                 NULL_RTX, 0, OPTAB_WIDEN);
4946             expand_inc (quotient, tem);
4947             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4948                                 NULL_RTX, 0, OPTAB_WIDEN);
4949             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4950                                 NULL_RTX, 0, OPTAB_WIDEN);
4951             expand_dec (remainder, tem);
4952             emit_label (label);
4953           }
4954         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4955
4956       default:
4957         gcc_unreachable ();
4958       }
4959
4960   if (quotient == 0)
4961     {
4962       if (target && GET_MODE (target) != compute_mode)
4963         target = 0;
4964
4965       if (rem_flag)
4966         {
4967           /* Try to produce the remainder without producing the quotient.
4968              If we seem to have a divmod pattern that does not require widening,
4969              don't try widening here.  We should really have a WIDEN argument
4970              to expand_twoval_binop, since what we'd really like to do here is
4971              1) try a mod insn in compute_mode
4972              2) try a divmod insn in compute_mode
4973              3) try a div insn in compute_mode and multiply-subtract to get
4974                 remainder
4975              4) try the same things with widening allowed.  */
4976           remainder
4977             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4978                                  op0, op1, target,
4979                                  unsignedp,
4980                                  ((optab_handler (optab2, compute_mode)
4981                                    != CODE_FOR_nothing)
4982                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4983           if (remainder == 0)
4984             {
4985               /* No luck there.  Can we do remainder and divide at once
4986                  without a library call?  */
4987               remainder = gen_reg_rtx (compute_mode);
4988               if (! expand_twoval_binop ((unsignedp
4989                                           ? udivmod_optab
4990                                           : sdivmod_optab),
4991                                          op0, op1,
4992                                          NULL_RTX, remainder, unsignedp))
4993                 remainder = 0;
4994             }
4995
4996           if (remainder)
4997             return gen_lowpart (mode, remainder);
4998         }
4999
5000       /* Produce the quotient.  Try a quotient insn, but not a library call.
5001          If we have a divmod in this mode, use it in preference to widening
5002          the div (for this test we assume it will not fail). Note that optab2
5003          is set to the one of the two optabs that the call below will use.  */
5004       quotient
5005         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5006                              op0, op1, rem_flag ? NULL_RTX : target,
5007                              unsignedp,
5008                              ((optab_handler (optab2, compute_mode)
5009                                != CODE_FOR_nothing)
5010                               ? OPTAB_DIRECT : OPTAB_WIDEN));
5011
5012       if (quotient == 0)
5013         {
5014           /* No luck there.  Try a quotient-and-remainder insn,
5015              keeping the quotient alone.  */
5016           quotient = gen_reg_rtx (compute_mode);
5017           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5018                                      op0, op1,
5019                                      quotient, NULL_RTX, unsignedp))
5020             {
5021               quotient = 0;
5022               if (! rem_flag)
5023                 /* Still no luck.  If we are not computing the remainder,
5024                    use a library call for the quotient.  */
5025                 quotient = sign_expand_binop (compute_mode,
5026                                               udiv_optab, sdiv_optab,
5027                                               op0, op1, target,
5028                                               unsignedp, OPTAB_LIB_WIDEN);
5029             }
5030         }
5031     }
5032
5033   if (rem_flag)
5034     {
5035       if (target && GET_MODE (target) != compute_mode)
5036         target = 0;
5037
5038       if (quotient == 0)
5039         {
5040           /* No divide instruction either.  Use library for remainder.  */
5041           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5042                                          op0, op1, target,
5043                                          unsignedp, OPTAB_LIB_WIDEN);
5044           /* No remainder function.  Try a quotient-and-remainder
5045              function, keeping the remainder.  */
5046           if (!remainder)
5047             {
5048               remainder = gen_reg_rtx (compute_mode);
5049               if (!expand_twoval_binop_libfunc
5050                   (unsignedp ? udivmod_optab : sdivmod_optab,
5051                    op0, op1,
5052                    NULL_RTX, remainder,
5053                    unsignedp ? UMOD : MOD))
5054                 remainder = NULL_RTX;
5055             }
5056         }
5057       else
5058         {
5059           /* We divided.  Now finish doing X - Y * (X / Y).  */
5060           remainder = expand_mult (compute_mode, quotient, op1,
5061                                    NULL_RTX, unsignedp);
5062           remainder = expand_binop (compute_mode, sub_optab, op0,
5063                                     remainder, target, unsignedp,
5064                                     OPTAB_LIB_WIDEN);
5065         }
5066     }
5067
5068   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5069 }
5070 \f
5071 /* Return a tree node with data type TYPE, describing the value of X.
5072    Usually this is an VAR_DECL, if there is no obvious better choice.
5073    X may be an expression, however we only support those expressions
5074    generated by loop.c.  */
5075
5076 tree
5077 make_tree (tree type, rtx x)
5078 {
5079   tree t;
5080
5081   switch (GET_CODE (x))
5082     {
5083     case CONST_INT:
5084     case CONST_WIDE_INT:
5085       t = wide_int_to_tree (type, std::make_pair (x, TYPE_MODE (type)));
5086       return t;
5087
5088     case CONST_DOUBLE:
5089       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5090       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5091         t = wide_int_to_tree (type,
5092                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5093                                                     HOST_BITS_PER_WIDE_INT * 2));
5094       else
5095         t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5096
5097       return t;
5098
5099     case CONST_VECTOR:
5100       {
5101         int units = CONST_VECTOR_NUNITS (x);
5102         tree itype = TREE_TYPE (type);
5103         tree *elts;
5104         int i;
5105
5106         /* Build a tree with vector elements.  */
5107         elts = XALLOCAVEC (tree, units);
5108         for (i = units - 1; i >= 0; --i)
5109           {
5110             rtx elt = CONST_VECTOR_ELT (x, i);
5111             elts[i] = make_tree (itype, elt);
5112           }
5113
5114         return build_vector (type, elts);
5115       }
5116
5117     case PLUS:
5118       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5119                           make_tree (type, XEXP (x, 1)));
5120
5121     case MINUS:
5122       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5123                           make_tree (type, XEXP (x, 1)));
5124
5125     case NEG:
5126       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5127
5128     case MULT:
5129       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5130                           make_tree (type, XEXP (x, 1)));
5131
5132     case ASHIFT:
5133       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5134                           make_tree (type, XEXP (x, 1)));
5135
5136     case LSHIFTRT:
5137       t = unsigned_type_for (type);
5138       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5139                                          make_tree (t, XEXP (x, 0)),
5140                                          make_tree (type, XEXP (x, 1))));
5141
5142     case ASHIFTRT:
5143       t = signed_type_for (type);
5144       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5145                                          make_tree (t, XEXP (x, 0)),
5146                                          make_tree (type, XEXP (x, 1))));
5147
5148     case DIV:
5149       if (TREE_CODE (type) != REAL_TYPE)
5150         t = signed_type_for (type);
5151       else
5152         t = type;
5153
5154       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5155                                          make_tree (t, XEXP (x, 0)),
5156                                          make_tree (t, XEXP (x, 1))));
5157     case UDIV:
5158       t = unsigned_type_for (type);
5159       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5160                                          make_tree (t, XEXP (x, 0)),
5161                                          make_tree (t, XEXP (x, 1))));
5162
5163     case SIGN_EXTEND:
5164     case ZERO_EXTEND:
5165       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5166                                           GET_CODE (x) == ZERO_EXTEND);
5167       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5168
5169     case CONST:
5170       return make_tree (type, XEXP (x, 0));
5171
5172     case SYMBOL_REF:
5173       t = SYMBOL_REF_DECL (x);
5174       if (t)
5175         return fold_convert (type, build_fold_addr_expr (t));
5176       /* else fall through.  */
5177
5178     default:
5179       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5180
5181       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5182          address mode to pointer mode.  */
5183       if (POINTER_TYPE_P (type))
5184         x = convert_memory_address_addr_space
5185               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5186
5187       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5188          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5189       t->decl_with_rtl.rtl = x;
5190
5191       return t;
5192     }
5193 }
5194 \f
5195 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5196    and returning TARGET.
5197
5198    If TARGET is 0, a pseudo-register or constant is returned.  */
5199
5200 rtx
5201 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5202 {
5203   rtx tem = 0;
5204
5205   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5206     tem = simplify_binary_operation (AND, mode, op0, op1);
5207   if (tem == 0)
5208     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5209
5210   if (target == 0)
5211     target = tem;
5212   else if (tem != target)
5213     emit_move_insn (target, tem);
5214   return target;
5215 }
5216
5217 /* Helper function for emit_store_flag.  */
5218 rtx
5219 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5220              machine_mode mode, machine_mode compare_mode,
5221              int unsignedp, rtx x, rtx y, int normalizep,
5222              machine_mode target_mode)
5223 {
5224   struct expand_operand ops[4];
5225   rtx op0, comparison, subtarget;
5226   rtx_insn *last;
5227   machine_mode result_mode = targetm.cstore_mode (icode);
5228
5229   last = get_last_insn ();
5230   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5231   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5232   if (!x || !y)
5233     {
5234       delete_insns_since (last);
5235       return NULL_RTX;
5236     }
5237
5238   if (target_mode == VOIDmode)
5239     target_mode = result_mode;
5240   if (!target)
5241     target = gen_reg_rtx (target_mode);
5242
5243   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5244
5245   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5246   create_fixed_operand (&ops[1], comparison);
5247   create_fixed_operand (&ops[2], x);
5248   create_fixed_operand (&ops[3], y);
5249   if (!maybe_expand_insn (icode, 4, ops))
5250     {
5251       delete_insns_since (last);
5252       return NULL_RTX;
5253     }
5254   subtarget = ops[0].value;
5255
5256   /* If we are converting to a wider mode, first convert to
5257      TARGET_MODE, then normalize.  This produces better combining
5258      opportunities on machines that have a SIGN_EXTRACT when we are
5259      testing a single bit.  This mostly benefits the 68k.
5260
5261      If STORE_FLAG_VALUE does not have the sign bit set when
5262      interpreted in MODE, we can do this conversion as unsigned, which
5263      is usually more efficient.  */
5264   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5265     {
5266       convert_move (target, subtarget,
5267                     val_signbit_known_clear_p (result_mode,
5268                                                STORE_FLAG_VALUE));
5269       op0 = target;
5270       result_mode = target_mode;
5271     }
5272   else
5273     op0 = subtarget;
5274
5275   /* If we want to keep subexpressions around, don't reuse our last
5276      target.  */
5277   if (optimize)
5278     subtarget = 0;
5279
5280   /* Now normalize to the proper value in MODE.  Sometimes we don't
5281      have to do anything.  */
5282   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5283     ;
5284   /* STORE_FLAG_VALUE might be the most negative number, so write
5285      the comparison this way to avoid a compiler-time warning.  */
5286   else if (- normalizep == STORE_FLAG_VALUE)
5287     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5288
5289   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5290      it hard to use a value of just the sign bit due to ANSI integer
5291      constant typing rules.  */
5292   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5293     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5294                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5295                         normalizep == 1);
5296   else
5297     {
5298       gcc_assert (STORE_FLAG_VALUE & 1);
5299
5300       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5301       if (normalizep == -1)
5302         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5303     }
5304
5305   /* If we were converting to a smaller mode, do the conversion now.  */
5306   if (target_mode != result_mode)
5307     {
5308       convert_move (target, op0, 0);
5309       return target;
5310     }
5311   else
5312     return op0;
5313 }
5314
5315
5316 /* A subroutine of emit_store_flag only including "tricks" that do not
5317    need a recursive call.  These are kept separate to avoid infinite
5318    loops.  */
5319
5320 static rtx
5321 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5322                    machine_mode mode, int unsignedp, int normalizep,
5323                    machine_mode target_mode)
5324 {
5325   rtx subtarget;
5326   enum insn_code icode;
5327   machine_mode compare_mode;
5328   enum mode_class mclass;
5329   enum rtx_code scode;
5330
5331   if (unsignedp)
5332     code = unsigned_condition (code);
5333   scode = swap_condition (code);
5334
5335   /* If one operand is constant, make it the second one.  Only do this
5336      if the other operand is not constant as well.  */
5337
5338   if (swap_commutative_operands_p (op0, op1))
5339     {
5340       std::swap (op0, op1);
5341       code = swap_condition (code);
5342     }
5343
5344   if (mode == VOIDmode)
5345     mode = GET_MODE (op0);
5346
5347   /* For some comparisons with 1 and -1, we can convert this to
5348      comparisons with zero.  This will often produce more opportunities for
5349      store-flag insns.  */
5350
5351   switch (code)
5352     {
5353     case LT:
5354       if (op1 == const1_rtx)
5355         op1 = const0_rtx, code = LE;
5356       break;
5357     case LE:
5358       if (op1 == constm1_rtx)
5359         op1 = const0_rtx, code = LT;
5360       break;
5361     case GE:
5362       if (op1 == const1_rtx)
5363         op1 = const0_rtx, code = GT;
5364       break;
5365     case GT:
5366       if (op1 == constm1_rtx)
5367         op1 = const0_rtx, code = GE;
5368       break;
5369     case GEU:
5370       if (op1 == const1_rtx)
5371         op1 = const0_rtx, code = NE;
5372       break;
5373     case LTU:
5374       if (op1 == const1_rtx)
5375         op1 = const0_rtx, code = EQ;
5376       break;
5377     default:
5378       break;
5379     }
5380
5381   /* If we are comparing a double-word integer with zero or -1, we can
5382      convert the comparison into one involving a single word.  */
5383   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5384       && GET_MODE_CLASS (mode) == MODE_INT
5385       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5386     {
5387       rtx tem;
5388       if ((code == EQ || code == NE)
5389           && (op1 == const0_rtx || op1 == constm1_rtx))
5390         {
5391           rtx op00, op01;
5392
5393           /* Do a logical OR or AND of the two words and compare the
5394              result.  */
5395           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5396           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5397           tem = expand_binop (word_mode,
5398                               op1 == const0_rtx ? ior_optab : and_optab,
5399                               op00, op01, NULL_RTX, unsignedp,
5400                               OPTAB_DIRECT);
5401
5402           if (tem != 0)
5403             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5404                                    unsignedp, normalizep);
5405         }
5406       else if ((code == LT || code == GE) && op1 == const0_rtx)
5407         {
5408           rtx op0h;
5409
5410           /* If testing the sign bit, can just test on high word.  */
5411           op0h = simplify_gen_subreg (word_mode, op0, mode,
5412                                       subreg_highpart_offset (word_mode,
5413                                                               mode));
5414           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5415                                  unsignedp, normalizep);
5416         }
5417       else
5418         tem = NULL_RTX;
5419
5420       if (tem)
5421         {
5422           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5423             return tem;
5424           if (!target)
5425             target = gen_reg_rtx (target_mode);
5426
5427           convert_move (target, tem,
5428                         !val_signbit_known_set_p (word_mode,
5429                                                   (normalizep ? normalizep
5430                                                    : STORE_FLAG_VALUE)));
5431           return target;
5432         }
5433     }
5434
5435   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5436      complement of A (for GE) and shifting the sign bit to the low bit.  */
5437   if (op1 == const0_rtx && (code == LT || code == GE)
5438       && GET_MODE_CLASS (mode) == MODE_INT
5439       && (normalizep || STORE_FLAG_VALUE == 1
5440           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5441     {
5442       subtarget = target;
5443
5444       if (!target)
5445         target_mode = mode;
5446
5447       /* If the result is to be wider than OP0, it is best to convert it
5448          first.  If it is to be narrower, it is *incorrect* to convert it
5449          first.  */
5450       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5451         {
5452           op0 = convert_modes (target_mode, mode, op0, 0);
5453           mode = target_mode;
5454         }
5455
5456       if (target_mode != mode)
5457         subtarget = 0;
5458
5459       if (code == GE)
5460         op0 = expand_unop (mode, one_cmpl_optab, op0,
5461                            ((STORE_FLAG_VALUE == 1 || normalizep)
5462                             ? 0 : subtarget), 0);
5463
5464       if (STORE_FLAG_VALUE == 1 || normalizep)
5465         /* If we are supposed to produce a 0/1 value, we want to do
5466            a logical shift from the sign bit to the low-order bit; for
5467            a -1/0 value, we do an arithmetic shift.  */
5468         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5469                             GET_MODE_BITSIZE (mode) - 1,
5470                             subtarget, normalizep != -1);
5471
5472       if (mode != target_mode)
5473         op0 = convert_modes (target_mode, mode, op0, 0);
5474
5475       return op0;
5476     }
5477
5478   mclass = GET_MODE_CLASS (mode);
5479   for (compare_mode = mode; compare_mode != VOIDmode;
5480        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5481     {
5482      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5483      icode = optab_handler (cstore_optab, optab_mode);
5484      if (icode != CODE_FOR_nothing)
5485         {
5486           do_pending_stack_adjust ();
5487           rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5488                                  unsignedp, op0, op1, normalizep, target_mode);
5489           if (tem)
5490             return tem;
5491
5492           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5493             {
5494               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5495                                  unsignedp, op1, op0, normalizep, target_mode);
5496               if (tem)
5497                 return tem;
5498             }
5499           break;
5500         }
5501     }
5502
5503   return 0;
5504 }
5505
5506 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5507    and storing in TARGET.  Normally return TARGET.
5508    Return 0 if that cannot be done.
5509
5510    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5511    it is VOIDmode, they cannot both be CONST_INT.
5512
5513    UNSIGNEDP is for the case where we have to widen the operands
5514    to perform the operation.  It says to use zero-extension.
5515
5516    NORMALIZEP is 1 if we should convert the result to be either zero
5517    or one.  Normalize is -1 if we should convert the result to be
5518    either zero or -1.  If NORMALIZEP is zero, the result will be left
5519    "raw" out of the scc insn.  */
5520
5521 rtx
5522 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5523                  machine_mode mode, int unsignedp, int normalizep)
5524 {
5525   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5526   enum rtx_code rcode;
5527   rtx subtarget;
5528   rtx tem, trueval;
5529   rtx_insn *last;
5530
5531   /* If we compare constants, we shouldn't use a store-flag operation,
5532      but a constant load.  We can get there via the vanilla route that
5533      usually generates a compare-branch sequence, but will in this case
5534      fold the comparison to a constant, and thus elide the branch.  */
5535   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5536     return NULL_RTX;
5537
5538   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5539                            target_mode);
5540   if (tem)
5541     return tem;
5542
5543   /* If we reached here, we can't do this with a scc insn, however there
5544      are some comparisons that can be done in other ways.  Don't do any
5545      of these cases if branches are very cheap.  */
5546   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5547     return 0;
5548
5549   /* See what we need to return.  We can only return a 1, -1, or the
5550      sign bit.  */
5551
5552   if (normalizep == 0)
5553     {
5554       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5555         normalizep = STORE_FLAG_VALUE;
5556
5557       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5558         ;
5559       else
5560         return 0;
5561     }
5562
5563   last = get_last_insn ();
5564
5565   /* If optimizing, use different pseudo registers for each insn, instead
5566      of reusing the same pseudo.  This leads to better CSE, but slows
5567      down the compiler, since there are more pseudos */
5568   subtarget = (!optimize
5569                && (target_mode == mode)) ? target : NULL_RTX;
5570   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5571
5572   /* For floating-point comparisons, try the reverse comparison or try
5573      changing the "orderedness" of the comparison.  */
5574   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5575     {
5576       enum rtx_code first_code;
5577       bool and_them;
5578
5579       rcode = reverse_condition_maybe_unordered (code);
5580       if (can_compare_p (rcode, mode, ccp_store_flag)
5581           && (code == ORDERED || code == UNORDERED
5582               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5583               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5584         {
5585           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5586                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5587
5588           /* For the reverse comparison, use either an addition or a XOR.  */
5589           if (want_add
5590               && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5591                            optimize_insn_for_speed_p ()) == 0)
5592             {
5593               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5594                                        STORE_FLAG_VALUE, target_mode);
5595               if (tem)
5596                 return expand_binop (target_mode, add_optab, tem,
5597                                      gen_int_mode (normalizep, target_mode),
5598                                      target, 0, OPTAB_WIDEN);
5599             }
5600           else if (!want_add
5601                    && rtx_cost (trueval, mode, XOR, 1,
5602                                 optimize_insn_for_speed_p ()) == 0)
5603             {
5604               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5605                                        normalizep, target_mode);
5606               if (tem)
5607                 return expand_binop (target_mode, xor_optab, tem, trueval,
5608                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5609             }
5610         }
5611
5612       delete_insns_since (last);
5613
5614       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5615       if (code == ORDERED || code == UNORDERED)
5616         return 0;
5617
5618       and_them = split_comparison (code, mode, &first_code, &code);
5619
5620       /* If there are no NaNs, the first comparison should always fall through.
5621          Effectively change the comparison to the other one.  */
5622       if (!HONOR_NANS (mode))
5623         {
5624           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5625           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5626                                     target_mode);
5627         }
5628
5629       if (!HAVE_conditional_move)
5630         return 0;
5631
5632       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5633          conditional move.  */
5634       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5635                                normalizep, target_mode);
5636       if (tem == 0)
5637         return 0;
5638
5639       if (and_them)
5640         tem = emit_conditional_move (target, code, op0, op1, mode,
5641                                      tem, const0_rtx, GET_MODE (tem), 0);
5642       else
5643         tem = emit_conditional_move (target, code, op0, op1, mode,
5644                                      trueval, tem, GET_MODE (tem), 0);
5645
5646       if (tem == 0)
5647         delete_insns_since (last);
5648       return tem;
5649     }
5650
5651   /* The remaining tricks only apply to integer comparisons.  */
5652
5653   if (GET_MODE_CLASS (mode) != MODE_INT)
5654     return 0;
5655
5656   /* If this is an equality comparison of integers, we can try to exclusive-or
5657      (or subtract) the two operands and use a recursive call to try the
5658      comparison with zero.  Don't do any of these cases if branches are
5659      very cheap.  */
5660
5661   if ((code == EQ || code == NE) && op1 != const0_rtx)
5662     {
5663       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5664                           OPTAB_WIDEN);
5665
5666       if (tem == 0)
5667         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5668                             OPTAB_WIDEN);
5669       if (tem != 0)
5670         tem = emit_store_flag (target, code, tem, const0_rtx,
5671                                mode, unsignedp, normalizep);
5672       if (tem != 0)
5673         return tem;
5674
5675       delete_insns_since (last);
5676     }
5677
5678   /* For integer comparisons, try the reverse comparison.  However, for
5679      small X and if we'd have anyway to extend, implementing "X != 0"
5680      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5681   rcode = reverse_condition (code);
5682   if (can_compare_p (rcode, mode, ccp_store_flag)
5683       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5684             && code == NE
5685             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5686             && op1 == const0_rtx))
5687     {
5688       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5689                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5690
5691       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5692       if (want_add
5693           && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5694                        optimize_insn_for_speed_p ()) == 0)
5695         {
5696           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5697                                    STORE_FLAG_VALUE, target_mode);
5698           if (tem != 0)
5699             tem = expand_binop (target_mode, add_optab, tem,
5700                                 gen_int_mode (normalizep, target_mode),
5701                                 target, 0, OPTAB_WIDEN);
5702         }
5703       else if (!want_add
5704                && rtx_cost (trueval, mode, XOR, 1,
5705                             optimize_insn_for_speed_p ()) == 0)
5706         {
5707           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5708                                    normalizep, target_mode);
5709           if (tem != 0)
5710             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5711                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5712         }
5713
5714       if (tem != 0)
5715         return tem;
5716       delete_insns_since (last);
5717     }
5718
5719   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5720      the constant zero.  Reject all other comparisons at this point.  Only
5721      do LE and GT if branches are expensive since they are expensive on
5722      2-operand machines.  */
5723
5724   if (op1 != const0_rtx
5725       || (code != EQ && code != NE
5726           && (BRANCH_COST (optimize_insn_for_speed_p (),
5727                            false) <= 1 || (code != LE && code != GT))))
5728     return 0;
5729
5730   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5731      do the necessary operation below.  */
5732
5733   tem = 0;
5734
5735   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5736      the sign bit set.  */
5737
5738   if (code == LE)
5739     {
5740       /* This is destructive, so SUBTARGET can't be OP0.  */
5741       if (rtx_equal_p (subtarget, op0))
5742         subtarget = 0;
5743
5744       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5745                           OPTAB_WIDEN);
5746       if (tem)
5747         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5748                             OPTAB_WIDEN);
5749     }
5750
5751   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5752      number of bits in the mode of OP0, minus one.  */
5753
5754   if (code == GT)
5755     {
5756       if (rtx_equal_p (subtarget, op0))
5757         subtarget = 0;
5758
5759       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5760                           GET_MODE_BITSIZE (mode) - 1,
5761                           subtarget, 0);
5762       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5763                           OPTAB_WIDEN);
5764     }
5765
5766   if (code == EQ || code == NE)
5767     {
5768       /* For EQ or NE, one way to do the comparison is to apply an operation
5769          that converts the operand into a positive number if it is nonzero
5770          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5771          for NE we negate.  This puts the result in the sign bit.  Then we
5772          normalize with a shift, if needed.
5773
5774          Two operations that can do the above actions are ABS and FFS, so try
5775          them.  If that doesn't work, and MODE is smaller than a full word,
5776          we can use zero-extension to the wider mode (an unsigned conversion)
5777          as the operation.  */
5778
5779       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5780          that is compensated by the subsequent overflow when subtracting
5781          one / negating.  */
5782
5783       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5784         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5785       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5786         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5787       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5788         {
5789           tem = convert_modes (word_mode, mode, op0, 1);
5790           mode = word_mode;
5791         }
5792
5793       if (tem != 0)
5794         {
5795           if (code == EQ)
5796             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5797                                 0, OPTAB_WIDEN);
5798           else
5799             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5800         }
5801
5802       /* If we couldn't do it that way, for NE we can "or" the two's complement
5803          of the value with itself.  For EQ, we take the one's complement of
5804          that "or", which is an extra insn, so we only handle EQ if branches
5805          are expensive.  */
5806
5807       if (tem == 0
5808           && (code == NE
5809               || BRANCH_COST (optimize_insn_for_speed_p (),
5810                               false) > 1))
5811         {
5812           if (rtx_equal_p (subtarget, op0))
5813             subtarget = 0;
5814
5815           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5816           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5817                               OPTAB_WIDEN);
5818
5819           if (tem && code == EQ)
5820             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5821         }
5822     }
5823
5824   if (tem && normalizep)
5825     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5826                         GET_MODE_BITSIZE (mode) - 1,
5827                         subtarget, normalizep == 1);
5828
5829   if (tem)
5830     {
5831       if (!target)
5832         ;
5833       else if (GET_MODE (tem) != target_mode)
5834         {
5835           convert_move (target, tem, 0);
5836           tem = target;
5837         }
5838       else if (!subtarget)
5839         {
5840           emit_move_insn (target, tem);
5841           tem = target;
5842         }
5843     }
5844   else
5845     delete_insns_since (last);
5846
5847   return tem;
5848 }
5849
5850 /* Like emit_store_flag, but always succeeds.  */
5851
5852 rtx
5853 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5854                        machine_mode mode, int unsignedp, int normalizep)
5855 {
5856   rtx tem;
5857   rtx_code_label *label;
5858   rtx trueval, falseval;
5859
5860   /* First see if emit_store_flag can do the job.  */
5861   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5862   if (tem != 0)
5863     return tem;
5864
5865   if (!target)
5866     target = gen_reg_rtx (word_mode);
5867
5868   /* If this failed, we have to do this with set/compare/jump/set code.
5869      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5870   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5871   if (code == NE
5872       && GET_MODE_CLASS (mode) == MODE_INT
5873       && REG_P (target)
5874       && op0 == target
5875       && op1 == const0_rtx)
5876     {
5877       label = gen_label_rtx ();
5878       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
5879                                NULL_RTX, NULL, label, -1);
5880       emit_move_insn (target, trueval);
5881       emit_label (label);
5882       return target;
5883     }
5884
5885   if (!REG_P (target)
5886       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5887     target = gen_reg_rtx (GET_MODE (target));
5888
5889   /* Jump in the right direction if the target cannot implement CODE
5890      but can jump on its reverse condition.  */
5891   falseval = const0_rtx;
5892   if (! can_compare_p (code, mode, ccp_jump)
5893       && (! FLOAT_MODE_P (mode)
5894           || code == ORDERED || code == UNORDERED
5895           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5896           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5897     {
5898       enum rtx_code rcode;
5899       if (FLOAT_MODE_P (mode))
5900         rcode = reverse_condition_maybe_unordered (code);
5901       else
5902         rcode = reverse_condition (code);
5903
5904       /* Canonicalize to UNORDERED for the libcall.  */
5905       if (can_compare_p (rcode, mode, ccp_jump)
5906           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5907         {
5908           falseval = trueval;
5909           trueval = const0_rtx;
5910           code = rcode;
5911         }
5912     }
5913
5914   emit_move_insn (target, trueval);
5915   label = gen_label_rtx ();
5916   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
5917                            label, -1);
5918
5919   emit_move_insn (target, falseval);
5920   emit_label (label);
5921
5922   return target;
5923 }
5924 \f
5925 /* Perform possibly multi-word comparison and conditional jump to LABEL
5926    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5927    now a thin wrapper around do_compare_rtx_and_jump.  */
5928
5929 static void
5930 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
5931                  rtx_code_label *label)
5932 {
5933   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5934   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
5935                            NULL, label, -1);
5936 }